source("custom_functions.R")
Importing ASV, taxa and metadata tables for both Czech and Norway samples.
Czech
path = "../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
check.names = FALSE))
Norway
path = "../../data/analysis_ready_data/norway/"
asv_tab_norway <- as.data.frame(fread(file.path(path,"asv_table_norway.csv"),
check.names = FALSE))
taxa_tab_norway <- as.data.frame(fread(file.path(path,"taxa_table_norway.csv"),
check.names = FALSE))
metadata_norway <- as.data.frame(fread(file.path(path,"metadata_norway.csv"),
check.names = FALSE))
TO DO: STATISTICS OF READS
Merging two countries based on the different matrices - Ileum, Colon.
Terminal ileum
ileum_data <- merging_data(asv_tab_1=asv_tab_ikem,
asv_tab_2=asv_tab_norway,
taxa_tab_1=taxa_tab_ikem,
taxa_tab_2=taxa_tab_norway,
metadata_1=metadata_ikem,
metadata_2=metadata_norway,
segment="TI",Q="Q1")
Removing 1498 ASV(s)
Removing 1834 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]
Colon
colon_data <- merging_data(asv_tab_1=asv_tab_ikem,
asv_tab_2=asv_tab_norway,
taxa_tab_1=taxa_tab_ikem,
taxa_tab_2=taxa_tab_norway,
metadata_1=metadata_ikem,
metadata_2=metadata_norway,
segment="colon",Q="Q1")
Removing 739 ASV(s)
Removing 266 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]
segment="terminal_ileum"
path = "../results/Q1/models_overfitting_check"
model="enet"
level="ASV"
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_clr")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ <- list()
models_cm <- list()
betas <- list()
roc_cs <- list()
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.4000000
lambda 0.3016707
auc 0.5000000
auc_czech 0.5000000
auc_no 0.5000000
auc_optimism_corrected 0.4906897
auc_optimism_corrected_CIL 0.4032509
auc_optimism_corrected_CIU 0.5612145
accuracy 0.5104895
accuracy_czech NaN
accuracy_no 0.4512195
accuracy_optimism_corrected 0.4670757
accuracy_optimism_corrected_CIL 0.3776515
accuracy_optimism_corrected_CIU 0.5437075
enet_model$conf_matrices
$original
0
0 73 0
1 70 0
$czech
0
0 36 0
1 25 0
$no
0
0 37 0
1 45 0
enet_model$plot
roc_c
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.0000000
lambda 92.0694517
auc 0.9081781
auc_czech 0.9133510
auc_no 0.9038583
auc_optimism_corrected 0.4794630
auc_optimism_corrected_CIL 0.3607320
auc_optimism_corrected_CIU 0.5973844
accuracy 0.6634615
accuracy_czech NaN
accuracy_no 0.6219512
accuracy_optimism_corrected 0.5998475
accuracy_optimism_corrected_CIL 0.5258619
accuracy_optimism_corrected_CIU 0.6511905
enet_model$conf_matrices
$original
0
0 138 0
1 70 0
$czech
0
0 87 0
1 39 0
$no
0
0 51 0
1 31 0
enet_model$plot
roc_c
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.0000000
lambda 104.9051811
auc 0.8153663
auc_czech 0.8154040
auc_no 0.7917638
auc_optimism_corrected 0.5463519
auc_optimism_corrected_CIL 0.4727740
auc_optimism_corrected_CIU 0.5936274
accuracy 0.6540284
accuracy_czech NaN
accuracy_no 0.5416667
accuracy_optimism_corrected 0.5973663
accuracy_optimism_corrected_CIL 0.4692958
accuracy_optimism_corrected_CIU 0.6834416
enet_model$conf_matrices
$original
1
0 73 0
1 138 0
$czech
1
0 40 0
1 99 0
$no
1
0 33 0
1 39 0
enet_model$plot
roc_c
level="genus"
Aggregate taxa
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level = level)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.0000000
lambda 117.0690961
auc 0.7978474
auc_czech 0.8950216
auc_no 0.7339286
auc_optimism_corrected 0.5261700
auc_optimism_corrected_CIL 0.4175710
auc_optimism_corrected_CIU 0.6201754
accuracy 0.5104895
accuracy_czech NaN
accuracy_no 0.4878049
accuracy_optimism_corrected 0.4905480
accuracy_optimism_corrected_CIL 0.4242232
accuracy_optimism_corrected_CIU 0.5501540
enet_model$conf_matrices
$original
0
0 73 0
1 70 0
$czech
0
0 33 0
1 28 0
$no
0
0 40 0
1 42 0
enet_model$plot
roc_c
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group", N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.8000000
lambda 0.1118729
auc 0.5000000
auc_czech 0.5000000
auc_no 0.5000000
auc_optimism_corrected 0.5630938
auc_optimism_corrected_CIL 0.4964636
auc_optimism_corrected_CIU 0.6150775
accuracy 0.6634615
accuracy_czech NaN
accuracy_no 0.6219512
accuracy_optimism_corrected 0.6072686
accuracy_optimism_corrected_CIL 0.5339850
accuracy_optimism_corrected_CIU 0.6505456
enet_model$conf_matrices
$original
0
0 138 0
1 70 0
$czech
0
0 87 0
1 39 0
$no
0
0 51 0
1 31 0
enet_model$plot
roc_c
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,group,
usage="ml_clr")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.0000000
lambda 96.2015279
auc 0.7387334
auc_czech 0.7366162
auc_no 0.6876457
auc_optimism_corrected 0.6247644
auc_optimism_corrected_CIL 0.5590581
auc_optimism_corrected_CIU 0.6976382
accuracy 0.6540284
accuracy_czech NaN
accuracy_no 0.5416667
accuracy_optimism_corrected 0.6404057
accuracy_optimism_corrected_CIL 0.5724764
accuracy_optimism_corrected_CIU 0.6953307
enet_model$conf_matrices
$original
1
0 73 0
1 138 0
$czech
1
0 40 0
1 99 0
$no
1
0 33 0
1 39 0
enet_model$plot
roc_c
models_summ_df_ileum <- do.call(rbind,
models_summ[grep(segment,names(models_summ),value = TRUE)])
write.csv(models_summ_df_ileum,file.path(path,paste0("elastic_net_",segment,".csv")))
supplements_models <- list()
model="knn"
level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_clr")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 18.0000000
auc 0.6246575
auc_optimism_corrected 0.4915453
auc_optimism_corrected_CIL 0.3685407
auc_optimism_corrected_CIU 0.5807214
accuracy 0.5804196
accuracy_optimism_corrected 0.4580934
accuracy_optimism_corrected_CIL 0.3686731
accuracy_optimism_corrected_CIU 0.5555556
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_clr")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 29.0000000
auc 0.6160455
auc_optimism_corrected 0.5418588
auc_optimism_corrected_CIL 0.4747331
auc_optimism_corrected_CIU 0.5945267
accuracy 0.6586538
accuracy_optimism_corrected 0.6311507
accuracy_optimism_corrected_CIL 0.5332550
accuracy_optimism_corrected_CIU 0.7125676
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_clr")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 29.0000000
auc 0.5878003
auc_optimism_corrected 0.5187169
auc_optimism_corrected_CIL 0.3925478
auc_optimism_corrected_CIU 0.6139842
accuracy 0.6635071
accuracy_optimism_corrected 0.5976041
accuracy_optimism_corrected_CIL 0.5291545
accuracy_optimism_corrected_CIU 0.6686495
roc_c
level="genus"
Aggregate taxa
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level = level)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 20.0000000
auc 0.6411937
auc_optimism_corrected 0.4397675
auc_optimism_corrected_CIL 0.3224986
auc_optimism_corrected_CIU 0.5695415
accuracy 0.5384615
accuracy_optimism_corrected 0.4693208
accuracy_optimism_corrected_CIL 0.3540816
accuracy_optimism_corrected_CIU 0.5338255
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 30.0000000
auc 0.5883540
auc_optimism_corrected 0.4775349
auc_optimism_corrected_CIL 0.4039989
auc_optimism_corrected_CIU 0.5404422
accuracy 0.6634615
accuracy_optimism_corrected 0.6082219
accuracy_optimism_corrected_CIL 0.5222152
accuracy_optimism_corrected_CIU 0.7109578
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 28.0000000
auc 0.6282509
auc_optimism_corrected 0.5030252
auc_optimism_corrected_CIL 0.3861708
auc_optimism_corrected_CIU 0.6083495
accuracy 0.6682464
accuracy_optimism_corrected 0.5814859
accuracy_optimism_corrected_CIL 0.4716971
accuracy_optimism_corrected_CIU 0.6906985
roc_c
model="rf"
level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_clr")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "61"
splitrule "gini"
min.node.size "5"
auc "1"
auc_optimism_corrected "0.4301035"
auc_optimism_corrected_CIL "0.3408448"
auc_optimism_corrected_CIU "0.5647091"
accuracy "1"
accuracy_optimism_corrected "0.4269609"
accuracy_optimism_corrected_CIL "0.3492692"
accuracy_optimism_corrected_CIU "0.5487759"
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_clr")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "325"
splitrule "gini"
min.node.size "5"
auc "1"
auc_optimism_corrected "0.5481132"
auc_optimism_corrected_CIL "0.4600454"
auc_optimism_corrected_CIU "0.5999195"
accuracy "1"
accuracy_optimism_corrected "0.6499321"
accuracy_optimism_corrected_CIL "0.6094981"
accuracy_optimism_corrected_CIU "0.7139803"
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_clr")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "253"
splitrule "gini"
min.node.size "2"
auc "1"
auc_optimism_corrected "0.4299593"
auc_optimism_corrected_CIL "0.374039"
auc_optimism_corrected_CIU "0.5151263"
accuracy "1"
accuracy_optimism_corrected "0.5824311"
accuracy_optimism_corrected_CIL "0.5220418"
accuracy_optimism_corrected_CIU "0.6492387"
roc_c
level="genus"
Aggregate taxa
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level = level)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "11"
splitrule "gini"
min.node.size "2"
auc "1"
auc_optimism_corrected "0.5408093"
auc_optimism_corrected_CIL "0.3967742"
auc_optimism_corrected_CIU "0.6373702"
accuracy "1"
accuracy_optimism_corrected "0.5208998"
accuracy_optimism_corrected_CIL "0.3946011"
accuracy_optimism_corrected_CIU "0.6094602"
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "43"
splitrule "gini"
min.node.size "5"
auc "1"
auc_optimism_corrected "0.4757971"
auc_optimism_corrected_CIL "0.4098952"
auc_optimism_corrected_CIU "0.5839277"
accuracy "1"
accuracy_optimism_corrected "0.6103743"
accuracy_optimism_corrected_CIL "0.5583919"
accuracy_optimism_corrected_CIU "0.664693"
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "5"
splitrule "gini"
min.node.size "5"
auc "1"
auc_optimism_corrected "0.5222253"
auc_optimism_corrected_CIL "0.4556092"
auc_optimism_corrected_CIU "0.6308566"
accuracy "1"
accuracy_optimism_corrected "0.6078591"
accuracy_optimism_corrected_CIL "0.5208554"
accuracy_optimism_corrected_CIU "0.6677322"
roc_c
model="gb"
level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_clr")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3551 nan 0.1000 -0.0132
2 1.3011 nan 0.1000 0.0130
3 1.2537 nan 0.1000 -0.0026
4 1.2045 nan 0.1000 0.0069
5 1.1723 nan 0.1000 -0.0065
6 1.1371 nan 0.1000 -0.0003
7 1.1145 nan 0.1000 -0.0075
8 1.0956 nan 0.1000 -0.0127
9 1.0635 nan 0.1000 -0.0054
10 1.0276 nan 0.1000 0.0016
20 0.8144 nan 0.1000 -0.0056
40 0.5539 nan 0.1000 -0.0049
60 0.3736 nan 0.1000 -0.0028
80 0.2694 nan 0.1000 -0.0033
100 0.1938 nan 0.1000 -0.0009
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3255 nan 0.1000 0.0189
2 1.2719 nan 0.1000 0.0151
3 1.2411 nan 0.1000 0.0112
4 1.1969 nan 0.1000 0.0098
5 1.1482 nan 0.1000 0.0143
6 1.1289 nan 0.1000 -0.0028
7 1.1071 nan 0.1000 0.0003
8 1.0828 nan 0.1000 -0.0009
9 1.0623 nan 0.1000 -0.0061
10 1.0522 nan 0.1000 -0.0011
20 0.7976 nan 0.1000 -0.0012
40 0.5404 nan 0.1000 0.0011
60 0.3704 nan 0.1000 -0.0016
80 0.2623 nan 0.1000 0.0015
100 0.2006 nan 0.1000 -0.0030
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3396 nan 0.1000 0.0039
2 1.2960 nan 0.1000 0.0088
3 1.2559 nan 0.1000 0.0140
4 1.2202 nan 0.1000 0.0053
5 1.2112 nan 0.1000 -0.0082
6 1.1687 nan 0.1000 0.0121
7 1.1523 nan 0.1000 -0.0019
8 1.1392 nan 0.1000 0.0016
9 1.1045 nan 0.1000 0.0059
10 1.0752 nan 0.1000 0.0074
20 0.9007 nan 0.1000 0.0011
40 0.6362 nan 0.1000 -0.0001
60 0.4747 nan 0.1000 0.0001
80 0.3589 nan 0.1000 0.0010
100 0.2571 nan 0.1000 -0.0014
120 0.1931 nan 0.1000 -0.0005
140 0.1503 nan 0.1000 0.0005
160 0.1122 nan 0.1000 -0.0007
180 0.0842 nan 0.1000 -0.0002
200 0.0641 nan 0.1000 -0.0011
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3511 nan 0.1000 0.0047
2 1.3214 nan 0.1000 0.0102
3 1.2996 nan 0.1000 0.0019
4 1.2732 nan 0.1000 0.0036
5 1.2532 nan 0.1000 0.0040
6 1.2427 nan 0.1000 -0.0072
7 1.2248 nan 0.1000 0.0026
8 1.2065 nan 0.1000 0.0024
9 1.1916 nan 0.1000 0.0024
10 1.1761 nan 0.1000 0.0010
20 1.0240 nan 0.1000 -0.0044
40 0.7996 nan 0.1000 -0.0009
60 0.6690 nan 0.1000 -0.0049
80 0.5493 nan 0.1000 -0.0015
100 0.4631 nan 0.1000 -0.0008
120 0.3865 nan 0.1000 -0.0012
140 0.3239 nan 0.1000 -0.0017
160 0.2701 nan 0.1000 -0.0007
180 0.2269 nan 0.1000 -0.0009
200 0.1910 nan 0.1000 -0.0006
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3620 nan 0.1000 -0.0022
2 1.3396 nan 0.1000 0.0008
3 1.3143 nan 0.1000 0.0034
4 1.2920 nan 0.1000 0.0048
5 1.2648 nan 0.1000 0.0029
6 1.2478 nan 0.1000 -0.0023
7 1.2307 nan 0.1000 0.0005
8 1.2104 nan 0.1000 -0.0018
9 1.1926 nan 0.1000 0.0006
10 1.1730 nan 0.1000 0.0004
20 1.0132 nan 0.1000 0.0046
40 0.7812 nan 0.1000 0.0014
60 0.6244 nan 0.1000 -0.0001
80 0.5104 nan 0.1000 -0.0017
100 0.4203 nan 0.1000 -0.0022
120 0.3488 nan 0.1000 -0.0021
140 0.2959 nan 0.1000 -0.0005
160 0.2499 nan 0.1000 0.0011
180 0.2085 nan 0.1000 -0.0013
200 0.1787 nan 0.1000 -0.0007
220 0.1507 nan 0.1000 -0.0002
240 0.1292 nan 0.1000 -0.0006
260 0.1091 nan 0.1000 -0.0002
280 0.0955 nan 0.1000 -0.0006
300 0.0798 nan 0.1000 -0.0003
320 0.0696 nan 0.1000 -0.0001
340 0.0595 nan 0.1000 -0.0002
360 0.0514 nan 0.1000 -0.0001
380 0.0433 nan 0.1000 -0.0003
400 0.0368 nan 0.1000 -0.0000
420 0.0318 nan 0.1000 -0.0002
440 0.0275 nan 0.1000 -0.0001
460 0.0235 nan 0.1000 -0.0002
480 0.0207 nan 0.1000 -0.0002
500 0.0174 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3666 nan 0.1000 -0.0113
2 1.3548 nan 0.1000 -0.0094
3 1.3404 nan 0.1000 -0.0046
4 1.3201 nan 0.1000 0.0008
5 1.3080 nan 0.1000 -0.0084
6 1.2823 nan 0.1000 0.0033
7 1.2624 nan 0.1000 -0.0038
8 1.2400 nan 0.1000 0.0033
9 1.2159 nan 0.1000 0.0071
10 1.1975 nan 0.1000 -0.0058
20 1.0535 nan 0.1000 0.0001
40 0.8332 nan 0.1000 0.0003
60 0.6808 nan 0.1000 -0.0023
80 0.5656 nan 0.1000 0.0003
100 0.4692 nan 0.1000 0.0007
120 0.4041 nan 0.1000 -0.0019
140 0.3406 nan 0.1000 -0.0012
160 0.3027 nan 0.1000 0.0001
180 0.2622 nan 0.1000 -0.0011
200 0.2241 nan 0.1000 0.0001
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3307 nan 0.1000 0.0076
2 1.2949 nan 0.1000 0.0081
3 1.2578 nan 0.1000 0.0081
4 1.2128 nan 0.1000 0.0096
5 1.1914 nan 0.1000 -0.0048
6 1.1633 nan 0.1000 -0.0060
7 1.1254 nan 0.1000 0.0081
8 1.1020 nan 0.1000 0.0012
9 1.0762 nan 0.1000 -0.0004
10 1.0322 nan 0.1000 0.0100
20 0.8296 nan 0.1000 -0.0017
40 0.5826 nan 0.1000 0.0039
60 0.4126 nan 0.1000 0.0006
80 0.3130 nan 0.1000 -0.0011
100 0.2309 nan 0.1000 -0.0026
120 0.1688 nan 0.1000 -0.0002
140 0.1221 nan 0.1000 -0.0002
160 0.0935 nan 0.1000 -0.0003
180 0.0723 nan 0.1000 -0.0001
200 0.0515 nan 0.1000 -0.0001
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3479 nan 0.1000 0.0088
2 1.3275 nan 0.1000 0.0012
3 1.3029 nan 0.1000 0.0025
4 1.2854 nan 0.1000 -0.0044
5 1.2671 nan 0.1000 0.0049
6 1.2507 nan 0.1000 0.0005
7 1.2265 nan 0.1000 0.0066
8 1.2013 nan 0.1000 0.0061
9 1.1892 nan 0.1000 -0.0043
10 1.1675 nan 0.1000 0.0043
20 0.9901 nan 0.1000 0.0012
40 0.7646 nan 0.1000 0.0011
60 0.6189 nan 0.1000 -0.0033
80 0.4998 nan 0.1000 -0.0005
100 0.4168 nan 0.1000 -0.0004
120 0.3374 nan 0.1000 -0.0014
140 0.2790 nan 0.1000 0.0006
160 0.2363 nan 0.1000 -0.0002
180 0.2003 nan 0.1000 -0.0011
200 0.1688 nan 0.1000 -0.0002
220 0.1442 nan 0.1000 -0.0001
240 0.1250 nan 0.1000 -0.0008
260 0.1071 nan 0.1000 -0.0000
280 0.0901 nan 0.1000 -0.0008
300 0.0756 nan 0.1000 -0.0002
320 0.0637 nan 0.1000 -0.0002
340 0.0544 nan 0.1000 -0.0000
360 0.0480 nan 0.1000 -0.0001
380 0.0409 nan 0.1000 0.0000
400 0.0350 nan 0.1000 -0.0001
420 0.0300 nan 0.1000 -0.0000
440 0.0259 nan 0.1000 -0.0000
460 0.0223 nan 0.1000 -0.0000
480 0.0188 nan 0.1000 -0.0001
500 0.0161 nan 0.1000 0.0000
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3566 nan 0.1000 0.0012
2 1.3223 nan 0.1000 0.0142
3 1.3093 nan 0.1000 -0.0114
4 1.2738 nan 0.1000 0.0082
5 1.2430 nan 0.1000 0.0075
6 1.2164 nan 0.1000 0.0068
7 1.1932 nan 0.1000 0.0068
8 1.1685 nan 0.1000 0.0053
9 1.1498 nan 0.1000 0.0033
10 1.1203 nan 0.1000 0.0096
20 0.9234 nan 0.1000 0.0028
40 0.7103 nan 0.1000 -0.0009
60 0.5610 nan 0.1000 -0.0005
80 0.4523 nan 0.1000 -0.0004
100 0.3668 nan 0.1000 -0.0013
120 0.3036 nan 0.1000 -0.0003
140 0.2557 nan 0.1000 -0.0034
160 0.2110 nan 0.1000 -0.0007
180 0.1757 nan 0.1000 -0.0005
200 0.1527 nan 0.1000 -0.0015
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3435 nan 0.1000 0.0111
2 1.3228 nan 0.1000 0.0005
3 1.2849 nan 0.1000 0.0160
4 1.2648 nan 0.1000 -0.0015
5 1.2454 nan 0.1000 0.0039
6 1.2255 nan 0.1000 0.0033
7 1.1929 nan 0.1000 0.0080
8 1.1792 nan 0.1000 -0.0028
9 1.1639 nan 0.1000 0.0017
10 1.1389 nan 0.1000 0.0067
20 0.9870 nan 0.1000 -0.0003
40 0.7627 nan 0.1000 0.0013
60 0.5959 nan 0.1000 0.0000
80 0.4714 nan 0.1000 0.0014
100 0.3805 nan 0.1000 -0.0012
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3327 nan 0.1000 0.0135
2 1.3136 nan 0.1000 0.0002
3 1.2921 nan 0.1000 0.0076
4 1.2566 nan 0.1000 0.0050
5 1.2240 nan 0.1000 0.0073
6 1.2145 nan 0.1000 -0.0117
7 1.1831 nan 0.1000 0.0061
8 1.1520 nan 0.1000 0.0111
9 1.1207 nan 0.1000 0.0068
10 1.0886 nan 0.1000 0.0034
20 0.8736 nan 0.1000 0.0051
40 0.6132 nan 0.1000 -0.0013
60 0.4463 nan 0.1000 0.0006
80 0.3251 nan 0.1000 -0.0008
100 0.2517 nan 0.1000 -0.0004
120 0.1987 nan 0.1000 -0.0010
140 0.1478 nan 0.1000 -0.0020
160 0.1099 nan 0.1000 0.0003
180 0.0828 nan 0.1000 -0.0009
200 0.0634 nan 0.1000 -0.0001
Using 100 trees...
Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs
# see the results
gbm_model$model_summary %>% t()
[,1]
n.trees 100.0000000
interaction.depth 3.0000000
shrinkage 0.1000000
n.minobsinnode 10.0000000
auc 1.0000000
auc_optimism_corrected 0.4688164
auc_optimism_corrected_CIL 0.3601462
auc_optimism_corrected_CIU 0.5439275
accuracy 1.0000000
accuracy_optimism_corrected 0.4723681
accuracy_optimism_corrected_CIL 0.3898390
accuracy_optimism_corrected_CIU 0.5189217
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_clr")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2588 nan 0.1000 -0.0058
2 1.2440 nan 0.1000 -0.0029
3 1.2238 nan 0.1000 0.0033
4 1.2059 nan 0.1000 -0.0053
5 1.1907 nan 0.1000 -0.0005
6 1.1689 nan 0.1000 0.0031
7 1.1495 nan 0.1000 0.0006
8 1.1443 nan 0.1000 -0.0085
9 1.1379 nan 0.1000 -0.0027
10 1.1248 nan 0.1000 -0.0017
20 1.0163 nan 0.1000 -0.0041
40 0.8553 nan 0.1000 -0.0043
60 0.7387 nan 0.1000 -0.0055
80 0.6414 nan 0.1000 -0.0056
100 0.5703 nan 0.1000 -0.0026
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2861 nan 0.1000 -0.0024
2 1.2544 nan 0.1000 0.0133
3 1.2268 nan 0.1000 0.0101
4 1.1985 nan 0.1000 0.0102
5 1.1794 nan 0.1000 0.0081
6 1.1575 nan 0.1000 0.0003
7 1.1335 nan 0.1000 0.0054
8 1.1105 nan 0.1000 0.0041
9 1.0926 nan 0.1000 -0.0022
10 1.0782 nan 0.1000 0.0049
20 0.9432 nan 0.1000 0.0033
40 0.7667 nan 0.1000 0.0014
60 0.6432 nan 0.1000 0.0003
80 0.5633 nan 0.1000 -0.0030
100 0.4913 nan 0.1000 -0.0017
120 0.4282 nan 0.1000 -0.0013
140 0.3717 nan 0.1000 0.0002
160 0.3296 nan 0.1000 -0.0006
180 0.2908 nan 0.1000 -0.0016
200 0.2635 nan 0.1000 -0.0013
220 0.2318 nan 0.1000 -0.0005
240 0.2077 nan 0.1000 0.0001
260 0.1841 nan 0.1000 -0.0011
280 0.1630 nan 0.1000 -0.0002
300 0.1457 nan 0.1000 -0.0005
320 0.1299 nan 0.1000 -0.0006
340 0.1177 nan 0.1000 -0.0008
360 0.1054 nan 0.1000 -0.0002
380 0.0944 nan 0.1000 -0.0003
400 0.0855 nan 0.1000 -0.0004
420 0.0770 nan 0.1000 -0.0005
440 0.0700 nan 0.1000 -0.0000
460 0.0641 nan 0.1000 -0.0004
480 0.0577 nan 0.1000 -0.0003
500 0.0526 nan 0.1000 -0.0003
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1447 nan 0.1000 0.0176
2 1.1124 nan 0.1000 0.0069
3 1.0851 nan 0.1000 0.0063
4 1.0556 nan 0.1000 0.0049
5 1.0135 nan 0.1000 0.0087
6 0.9883 nan 0.1000 0.0062
7 0.9607 nan 0.1000 0.0072
8 0.9285 nan 0.1000 0.0078
9 0.9008 nan 0.1000 0.0003
10 0.8826 nan 0.1000 -0.0023
20 0.6920 nan 0.1000 0.0038
40 0.4008 nan 0.1000 -0.0005
60 0.2868 nan 0.1000 0.0010
80 0.2022 nan 0.1000 -0.0018
100 0.1361 nan 0.1000 -0.0007
120 0.0993 nan 0.1000 -0.0011
140 0.0707 nan 0.1000 -0.0005
160 0.0494 nan 0.1000 -0.0000
180 0.0369 nan 0.1000 -0.0001
200 0.0271 nan 0.1000 -0.0002
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2158 nan 0.1000 0.0013
2 1.1941 nan 0.1000 0.0026
3 1.1765 nan 0.1000 0.0028
4 1.1548 nan 0.1000 -0.0027
5 1.1436 nan 0.1000 -0.0048
6 1.1264 nan 0.1000 0.0039
7 1.1122 nan 0.1000 0.0018
8 1.1004 nan 0.1000 -0.0024
9 1.0859 nan 0.1000 0.0010
10 1.0636 nan 0.1000 0.0083
20 0.9415 nan 0.1000 -0.0003
40 0.7798 nan 0.1000 -0.0014
60 0.6469 nan 0.1000 -0.0027
80 0.5538 nan 0.1000 -0.0004
100 0.4733 nan 0.1000 -0.0014
120 0.4109 nan 0.1000 -0.0010
140 0.3571 nan 0.1000 -0.0002
160 0.3113 nan 0.1000 -0.0017
180 0.2713 nan 0.1000 -0.0008
200 0.2393 nan 0.1000 -0.0003
220 0.2112 nan 0.1000 -0.0008
240 0.1843 nan 0.1000 -0.0005
260 0.1642 nan 0.1000 -0.0005
280 0.1465 nan 0.1000 -0.0006
300 0.1301 nan 0.1000 -0.0005
320 0.1149 nan 0.1000 -0.0004
340 0.1015 nan 0.1000 -0.0005
360 0.0903 nan 0.1000 0.0000
380 0.0808 nan 0.1000 -0.0003
400 0.0710 nan 0.1000 -0.0002
420 0.0643 nan 0.1000 -0.0005
440 0.0575 nan 0.1000 -0.0000
460 0.0522 nan 0.1000 -0.0003
480 0.0460 nan 0.1000 -0.0001
500 0.0409 nan 0.1000 -0.0002
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2296 nan 0.1000 0.0102
2 1.1984 nan 0.1000 0.0092
3 1.1692 nan 0.1000 0.0072
4 1.1322 nan 0.1000 0.0033
5 1.0915 nan 0.1000 0.0034
6 1.0549 nan 0.1000 0.0040
7 1.0289 nan 0.1000 0.0059
8 1.0021 nan 0.1000 0.0063
9 0.9727 nan 0.1000 0.0012
10 0.9465 nan 0.1000 0.0029
20 0.7228 nan 0.1000 0.0015
40 0.4507 nan 0.1000 0.0014
60 0.3080 nan 0.1000 -0.0024
80 0.2148 nan 0.1000 -0.0009
100 0.1445 nan 0.1000 -0.0004
120 0.1026 nan 0.1000 -0.0004
140 0.0713 nan 0.1000 0.0005
160 0.0514 nan 0.1000 -0.0000
180 0.0383 nan 0.1000 -0.0003
200 0.0268 nan 0.1000 -0.0001
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2555 nan 0.1000 0.0103
2 1.2048 nan 0.1000 0.0186
3 1.1775 nan 0.1000 0.0031
4 1.1456 nan 0.1000 -0.0011
5 1.1171 nan 0.1000 0.0030
6 1.0912 nan 0.1000 0.0024
7 1.0583 nan 0.1000 0.0037
8 1.0325 nan 0.1000 0.0062
9 0.9916 nan 0.1000 0.0085
10 0.9646 nan 0.1000 0.0009
20 0.7156 nan 0.1000 0.0025
40 0.4579 nan 0.1000 -0.0008
60 0.2990 nan 0.1000 -0.0022
80 0.1972 nan 0.1000 -0.0010
100 0.1320 nan 0.1000 -0.0008
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2948 nan 0.1000 0.0092
2 1.2556 nan 0.1000 0.0061
3 1.2332 nan 0.1000 0.0063
4 1.2141 nan 0.1000 0.0028
5 1.1918 nan 0.1000 0.0059
6 1.1657 nan 0.1000 0.0082
7 1.1493 nan 0.1000 0.0054
8 1.1236 nan 0.1000 0.0053
9 1.1121 nan 0.1000 -0.0018
10 1.1012 nan 0.1000 0.0023
20 0.9578 nan 0.1000 -0.0053
40 0.7255 nan 0.1000 0.0020
60 0.5885 nan 0.1000 -0.0019
80 0.4853 nan 0.1000 -0.0009
100 0.3833 nan 0.1000 0.0001
120 0.3160 nan 0.1000 0.0000
140 0.2586 nan 0.1000 -0.0008
160 0.2081 nan 0.1000 -0.0001
180 0.1658 nan 0.1000 -0.0003
200 0.1392 nan 0.1000 -0.0006
220 0.1137 nan 0.1000 -0.0015
240 0.0960 nan 0.1000 -0.0003
260 0.0779 nan 0.1000 0.0001
280 0.0654 nan 0.1000 -0.0006
300 0.0535 nan 0.1000 -0.0001
320 0.0454 nan 0.1000 -0.0004
340 0.0378 nan 0.1000 -0.0002
360 0.0310 nan 0.1000 -0.0000
380 0.0261 nan 0.1000 0.0000
400 0.0218 nan 0.1000 -0.0001
420 0.0177 nan 0.1000 -0.0000
440 0.0149 nan 0.1000 -0.0000
460 0.0125 nan 0.1000 -0.0000
480 0.0108 nan 0.1000 -0.0000
500 0.0090 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1939 nan 0.1000 0.0081
2 1.1717 nan 0.1000 -0.0043
3 1.1404 nan 0.1000 0.0066
4 1.1242 nan 0.1000 0.0002
5 1.0899 nan 0.1000 0.0084
6 1.0649 nan 0.1000 0.0017
7 1.0340 nan 0.1000 0.0075
8 1.0254 nan 0.1000 -0.0009
9 1.0018 nan 0.1000 0.0051
10 0.9895 nan 0.1000 0.0007
20 0.8351 nan 0.1000 -0.0014
40 0.6068 nan 0.1000 0.0023
60 0.4653 nan 0.1000 0.0002
80 0.3734 nan 0.1000 -0.0005
100 0.3110 nan 0.1000 -0.0007
120 0.2391 nan 0.1000 -0.0003
140 0.1883 nan 0.1000 -0.0004
160 0.1535 nan 0.1000 -0.0001
180 0.1251 nan 0.1000 -0.0005
200 0.0972 nan 0.1000 -0.0010
220 0.0834 nan 0.1000 -0.0003
240 0.0637 nan 0.1000 -0.0001
260 0.0521 nan 0.1000 -0.0004
280 0.0403 nan 0.1000 -0.0001
300 0.0317 nan 0.1000 -0.0003
320 0.0259 nan 0.1000 -0.0001
340 0.0210 nan 0.1000 -0.0000
360 0.0174 nan 0.1000 -0.0001
380 0.0144 nan 0.1000 -0.0001
400 0.0121 nan 0.1000 -0.0001
420 0.0097 nan 0.1000 -0.0001
440 0.0080 nan 0.1000 -0.0001
460 0.0066 nan 0.1000 -0.0000
480 0.0054 nan 0.1000 -0.0001
500 0.0043 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2386 nan 0.1000 -0.0003
2 1.2143 nan 0.1000 0.0061
3 1.1998 nan 0.1000 0.0024
4 1.1787 nan 0.1000 0.0050
5 1.1571 nan 0.1000 0.0045
6 1.1319 nan 0.1000 0.0101
7 1.1204 nan 0.1000 0.0007
8 1.1027 nan 0.1000 0.0043
9 1.0943 nan 0.1000 -0.0036
10 1.0698 nan 0.1000 0.0095
20 0.9326 nan 0.1000 0.0014
40 0.7408 nan 0.1000 -0.0003
60 0.5997 nan 0.1000 -0.0014
80 0.5007 nan 0.1000 -0.0005
100 0.4244 nan 0.1000 -0.0013
120 0.3661 nan 0.1000 -0.0025
140 0.3114 nan 0.1000 -0.0004
160 0.2622 nan 0.1000 0.0001
180 0.2265 nan 0.1000 -0.0007
200 0.1981 nan 0.1000 -0.0003
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1688 nan 0.1000 0.0319
2 1.1370 nan 0.1000 0.0067
3 1.1047 nan 0.1000 0.0019
4 1.0768 nan 0.1000 -0.0005
5 1.0322 nan 0.1000 0.0123
6 0.9844 nan 0.1000 0.0142
7 0.9361 nan 0.1000 0.0119
8 0.9037 nan 0.1000 0.0052
9 0.8697 nan 0.1000 0.0088
10 0.8384 nan 0.1000 0.0059
20 0.6652 nan 0.1000 0.0026
40 0.4049 nan 0.1000 0.0003
60 0.2511 nan 0.1000 -0.0014
80 0.1679 nan 0.1000 -0.0003
100 0.1252 nan 0.1000 -0.0008
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1830 nan 0.1000 0.0072
2 1.1180 nan 0.1000 0.0180
3 1.0665 nan 0.1000 0.0006
4 1.0200 nan 0.1000 0.0021
5 0.9613 nan 0.1000 0.0117
6 0.9148 nan 0.1000 0.0122
7 0.8732 nan 0.1000 0.0073
8 0.8374 nan 0.1000 0.0028
9 0.8178 nan 0.1000 -0.0055
10 0.7776 nan 0.1000 0.0087
20 0.5142 nan 0.1000 0.0011
40 0.2569 nan 0.1000 0.0006
60 0.1465 nan 0.1000 -0.0012
80 0.0861 nan 0.1000 -0.0006
100 0.0533 nan 0.1000 -0.0004
120 0.0322 nan 0.1000 -0.0002
140 0.0192 nan 0.1000 -0.0001
160 0.0112 nan 0.1000 -0.0001
180 0.0070 nan 0.1000 -0.0000
200 0.0043 nan 0.1000 0.0000
220 0.0027 nan 0.1000 -0.0000
240 0.0017 nan 0.1000 -0.0000
260 0.0010 nan 0.1000 0.0000
280 0.0006 nan 0.1000 -0.0000
300 0.0004 nan 0.1000 -0.0000
320 0.0003 nan 0.1000 -0.0000
340 0.0002 nan 0.1000 -0.0000
360 0.0001 nan 0.1000 0.0000
380 0.0001 nan 0.1000 -0.0000
400 0.0000 nan 0.1000 -0.0000
420 0.0000 nan 0.1000 -0.0000
440 0.0000 nan 0.1000 0.0000
460 0.0000 nan 0.1000 0.0000
480 0.0000 nan 0.1000 -0.0000
500 0.0000 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs
# see the results
gbm_model$model_summary %>% t()
[,1]
n.trees 100.0000000
interaction.depth 5.0000000
shrinkage 0.1000000
n.minobsinnode 30.0000000
auc 0.9924431
auc_optimism_corrected 0.5406939
auc_optimism_corrected_CIL 0.4727791
auc_optimism_corrected_CIU 0.6574737
accuracy 0.9567308
accuracy_optimism_corrected 0.6216785
accuracy_optimism_corrected_CIL 0.5529762
accuracy_optimism_corrected_CIU 0.6873889
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_clr")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2581 nan 0.1000 -0.0001
2 1.2346 nan 0.1000 -0.0028
3 1.2082 nan 0.1000 -0.0026
4 1.1824 nan 0.1000 -0.0070
5 1.1575 nan 0.1000 -0.0028
6 1.1322 nan 0.1000 -0.0006
7 1.1107 nan 0.1000 -0.0048
8 1.0967 nan 0.1000 -0.0138
9 1.0683 nan 0.1000 0.0015
10 1.0498 nan 0.1000 -0.0057
20 0.8683 nan 0.1000 -0.0011
40 0.6059 nan 0.1000 -0.0058
60 0.4462 nan 0.1000 0.0009
80 0.3300 nan 0.1000 -0.0018
100 0.2482 nan 0.1000 -0.0006
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2490 nan 0.1000 0.0172
2 1.2057 nan 0.1000 0.0067
3 1.1702 nan 0.1000 0.0053
4 1.1171 nan 0.1000 0.0126
5 1.0914 nan 0.1000 -0.0043
6 1.0514 nan 0.1000 0.0083
7 1.0186 nan 0.1000 0.0045
8 0.9884 nan 0.1000 0.0048
9 0.9537 nan 0.1000 0.0080
10 0.9206 nan 0.1000 0.0087
20 0.6551 nan 0.1000 0.0071
40 0.3888 nan 0.1000 -0.0012
60 0.2474 nan 0.1000 -0.0004
80 0.1621 nan 0.1000 -0.0001
100 0.1074 nan 0.1000 -0.0002
120 0.0733 nan 0.1000 -0.0006
140 0.0489 nan 0.1000 0.0001
160 0.0327 nan 0.1000 -0.0002
180 0.0227 nan 0.1000 -0.0000
200 0.0155 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1972 nan 0.1000 0.0119
2 1.1341 nan 0.1000 0.0163
3 1.0737 nan 0.1000 0.0152
4 1.0279 nan 0.1000 0.0073
5 0.9777 nan 0.1000 0.0163
6 0.9318 nan 0.1000 0.0076
7 0.8921 nan 0.1000 0.0085
8 0.8422 nan 0.1000 0.0135
9 0.8022 nan 0.1000 0.0078
10 0.7616 nan 0.1000 0.0104
20 0.5107 nan 0.1000 -0.0022
40 0.2621 nan 0.1000 -0.0001
60 0.1408 nan 0.1000 0.0001
80 0.0781 nan 0.1000 0.0002
100 0.0460 nan 0.1000 -0.0006
120 0.0271 nan 0.1000 -0.0005
140 0.0155 nan 0.1000 -0.0003
160 0.0099 nan 0.1000 -0.0002
180 0.0062 nan 0.1000 -0.0000
200 0.0037 nan 0.1000 -0.0001
220 0.0024 nan 0.1000 -0.0001
240 0.0016 nan 0.1000 -0.0000
260 0.0010 nan 0.1000 -0.0000
280 0.0007 nan 0.1000 -0.0000
300 0.0006 nan 0.1000 0.0000
320 0.0003 nan 0.1000 -0.0000
340 0.0002 nan 0.1000 -0.0000
360 0.0001 nan 0.1000 -0.0000
380 0.0001 nan 0.1000 0.0000
400 0.0001 nan 0.1000 -0.0000
420 0.0001 nan 0.1000 0.0000
440 0.0000 nan 0.1000 -0.0000
460 0.0000 nan 0.1000 0.0000
480 0.0000 nan 0.1000 -0.0000
500 0.0000 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2924 nan 0.1000 0.0057
2 1.2825 nan 0.1000 -0.0021
3 1.2619 nan 0.1000 0.0044
4 1.2437 nan 0.1000 0.0046
5 1.2206 nan 0.1000 0.0036
6 1.2120 nan 0.1000 -0.0048
7 1.2018 nan 0.1000 -0.0007
8 1.1817 nan 0.1000 0.0052
9 1.1736 nan 0.1000 -0.0034
10 1.1638 nan 0.1000 -0.0019
20 1.0516 nan 0.1000 0.0006
40 0.8866 nan 0.1000 -0.0008
60 0.7605 nan 0.1000 0.0006
80 0.6562 nan 0.1000 -0.0005
100 0.5727 nan 0.1000 -0.0012
120 0.5016 nan 0.1000 0.0001
140 0.4462 nan 0.1000 -0.0003
160 0.3960 nan 0.1000 -0.0006
180 0.3505 nan 0.1000 -0.0001
200 0.3141 nan 0.1000 -0.0011
220 0.2812 nan 0.1000 -0.0015
240 0.2494 nan 0.1000 -0.0005
260 0.2253 nan 0.1000 -0.0015
280 0.2023 nan 0.1000 -0.0007
300 0.1828 nan 0.1000 -0.0006
320 0.1658 nan 0.1000 -0.0002
340 0.1481 nan 0.1000 -0.0003
360 0.1309 nan 0.1000 0.0002
380 0.1169 nan 0.1000 -0.0006
400 0.1053 nan 0.1000 -0.0000
420 0.0949 nan 0.1000 -0.0002
440 0.0862 nan 0.1000 -0.0003
460 0.0781 nan 0.1000 -0.0000
480 0.0710 nan 0.1000 -0.0000
500 0.0656 nan 0.1000 -0.0001
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2567 nan 0.1000 0.0031
2 1.2334 nan 0.1000 0.0068
3 1.2161 nan 0.1000 -0.0014
4 1.1892 nan 0.1000 0.0076
5 1.1691 nan 0.1000 0.0062
6 1.1602 nan 0.1000 -0.0029
7 1.1414 nan 0.1000 0.0027
8 1.1300 nan 0.1000 -0.0025
9 1.1193 nan 0.1000 -0.0031
10 1.1093 nan 0.1000 -0.0022
20 0.9919 nan 0.1000 0.0013
40 0.8018 nan 0.1000 -0.0046
60 0.6759 nan 0.1000 -0.0003
80 0.5683 nan 0.1000 -0.0014
100 0.4893 nan 0.1000 0.0007
120 0.4201 nan 0.1000 0.0010
140 0.3657 nan 0.1000 -0.0008
160 0.3182 nan 0.1000 0.0008
180 0.2816 nan 0.1000 -0.0006
200 0.2453 nan 0.1000 -0.0003
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3135 nan 0.1000 -0.0040
2 1.2960 nan 0.1000 0.0012
3 1.2793 nan 0.1000 0.0032
4 1.2649 nan 0.1000 0.0018
5 1.2436 nan 0.1000 0.0063
6 1.2317 nan 0.1000 -0.0054
7 1.2158 nan 0.1000 0.0037
8 1.1978 nan 0.1000 0.0021
9 1.1814 nan 0.1000 0.0031
10 1.1647 nan 0.1000 0.0027
20 1.0362 nan 0.1000 0.0024
40 0.8549 nan 0.1000 -0.0021
60 0.7258 nan 0.1000 -0.0029
80 0.6269 nan 0.1000 -0.0039
100 0.5463 nan 0.1000 -0.0013
120 0.4788 nan 0.1000 -0.0002
140 0.4229 nan 0.1000 -0.0008
160 0.3765 nan 0.1000 -0.0021
180 0.3339 nan 0.1000 0.0000
200 0.2990 nan 0.1000 -0.0017
220 0.2639 nan 0.1000 0.0004
240 0.2318 nan 0.1000 0.0004
260 0.2089 nan 0.1000 -0.0001
280 0.1857 nan 0.1000 -0.0008
300 0.1639 nan 0.1000 -0.0007
320 0.1456 nan 0.1000 0.0000
340 0.1310 nan 0.1000 -0.0002
360 0.1152 nan 0.1000 -0.0002
380 0.1037 nan 0.1000 -0.0003
400 0.0932 nan 0.1000 -0.0003
420 0.0830 nan 0.1000 -0.0001
440 0.0743 nan 0.1000 -0.0001
460 0.0668 nan 0.1000 -0.0002
480 0.0606 nan 0.1000 -0.0001
500 0.0541 nan 0.1000 -0.0001
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3075 nan 0.1000 0.0123
2 1.2430 nan 0.1000 0.0180
3 1.2133 nan 0.1000 -0.0068
4 1.1627 nan 0.1000 0.0135
5 1.1252 nan 0.1000 0.0076
6 1.0842 nan 0.1000 0.0071
7 1.0450 nan 0.1000 0.0064
8 1.0112 nan 0.1000 0.0082
9 0.9857 nan 0.1000 -0.0024
10 0.9611 nan 0.1000 0.0009
20 0.7119 nan 0.1000 0.0040
40 0.4285 nan 0.1000 -0.0000
60 0.2691 nan 0.1000 0.0005
80 0.1792 nan 0.1000 0.0001
100 0.1189 nan 0.1000 -0.0003
120 0.0804 nan 0.1000 -0.0006
140 0.0537 nan 0.1000 -0.0004
160 0.0360 nan 0.1000 0.0000
180 0.0255 nan 0.1000 -0.0002
200 0.0177 nan 0.1000 -0.0000
220 0.0122 nan 0.1000 -0.0000
240 0.0084 nan 0.1000 -0.0000
260 0.0057 nan 0.1000 -0.0000
280 0.0040 nan 0.1000 -0.0000
300 0.0028 nan 0.1000 -0.0000
320 0.0020 nan 0.1000 -0.0000
340 0.0014 nan 0.1000 -0.0000
360 0.0010 nan 0.1000 -0.0000
380 0.0007 nan 0.1000 -0.0000
400 0.0005 nan 0.1000 0.0000
420 0.0003 nan 0.1000 0.0000
440 0.0002 nan 0.1000 -0.0000
460 0.0002 nan 0.1000 -0.0000
480 0.0001 nan 0.1000 -0.0000
500 0.0001 nan 0.1000 0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2683 nan 0.1000 0.0063
2 1.2420 nan 0.1000 -0.0053
3 1.2043 nan 0.1000 0.0086
4 1.1636 nan 0.1000 0.0068
5 1.1244 nan 0.1000 0.0063
6 1.0971 nan 0.1000 0.0062
7 1.0650 nan 0.1000 0.0049
8 1.0472 nan 0.1000 -0.0074
9 1.0215 nan 0.1000 0.0059
10 1.0042 nan 0.1000 -0.0010
20 0.7866 nan 0.1000 0.0014
40 0.5172 nan 0.1000 0.0032
60 0.3541 nan 0.1000 -0.0024
80 0.2434 nan 0.1000 -0.0009
100 0.1758 nan 0.1000 -0.0016
120 0.1262 nan 0.1000 -0.0009
140 0.0915 nan 0.1000 -0.0004
160 0.0652 nan 0.1000 -0.0005
180 0.0475 nan 0.1000 -0.0002
200 0.0349 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2510 nan 0.1000 -0.0002
2 1.2277 nan 0.1000 0.0078
3 1.2171 nan 0.1000 -0.0028
4 1.2019 nan 0.1000 0.0016
5 1.1828 nan 0.1000 0.0037
6 1.1611 nan 0.1000 0.0040
7 1.1442 nan 0.1000 0.0052
8 1.1266 nan 0.1000 0.0026
9 1.1118 nan 0.1000 0.0043
10 1.1001 nan 0.1000 0.0023
20 0.9939 nan 0.1000 0.0021
40 0.8335 nan 0.1000 -0.0026
60 0.7174 nan 0.1000 0.0008
80 0.6198 nan 0.1000 0.0008
100 0.5445 nan 0.1000 -0.0025
120 0.4695 nan 0.1000 0.0006
140 0.4193 nan 0.1000 -0.0024
160 0.3731 nan 0.1000 -0.0022
180 0.3291 nan 0.1000 -0.0013
200 0.2947 nan 0.1000 -0.0019
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2653 nan 0.1000 0.0168
2 1.2329 nan 0.1000 0.0114
3 1.2066 nan 0.1000 -0.0020
4 1.1751 nan 0.1000 0.0063
5 1.1488 nan 0.1000 0.0074
6 1.1278 nan 0.1000 0.0014
7 1.0979 nan 0.1000 0.0103
8 1.0810 nan 0.1000 0.0001
9 1.0561 nan 0.1000 0.0039
10 1.0496 nan 0.1000 -0.0012
20 0.8998 nan 0.1000 -0.0011
40 0.6745 nan 0.1000 0.0010
60 0.5035 nan 0.1000 -0.0007
80 0.3951 nan 0.1000 0.0010
100 0.3220 nan 0.1000 -0.0014
120 0.2631 nan 0.1000 -0.0007
140 0.2117 nan 0.1000 -0.0001
160 0.1746 nan 0.1000 -0.0005
180 0.1438 nan 0.1000 -0.0012
200 0.1164 nan 0.1000 -0.0007
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1371 nan 0.1000 0.0137
2 1.1171 nan 0.1000 0.0028
3 1.1054 nan 0.1000 -0.0048
4 1.0880 nan 0.1000 -0.0010
5 1.0763 nan 0.1000 -0.0007
6 1.0518 nan 0.1000 0.0019
7 1.0366 nan 0.1000 -0.0003
8 1.0198 nan 0.1000 0.0030
9 1.0024 nan 0.1000 0.0038
10 0.9892 nan 0.1000 -0.0011
20 0.8553 nan 0.1000 -0.0006
40 0.6747 nan 0.1000 0.0019
60 0.5484 nan 0.1000 0.0007
80 0.4696 nan 0.1000 -0.0003
100 0.3950 nan 0.1000 0.0001
120 0.3356 nan 0.1000 -0.0016
140 0.2901 nan 0.1000 -0.0002
160 0.2494 nan 0.1000 -0.0013
180 0.2151 nan 0.1000 -0.0002
200 0.1873 nan 0.1000 0.0001
220 0.1645 nan 0.1000 -0.0008
240 0.1431 nan 0.1000 0.0001
260 0.1239 nan 0.1000 -0.0001
280 0.1082 nan 0.1000 0.0000
300 0.0962 nan 0.1000 -0.0003
320 0.0855 nan 0.1000 -0.0001
340 0.0753 nan 0.1000 -0.0002
360 0.0673 nan 0.1000 -0.0002
380 0.0597 nan 0.1000 -0.0001
400 0.0522 nan 0.1000 -0.0001
420 0.0466 nan 0.1000 -0.0002
440 0.0409 nan 0.1000 -0.0000
460 0.0363 nan 0.1000 -0.0000
480 0.0320 nan 0.1000 -0.0001
500 0.0287 nan 0.1000 -0.0001
Using 100 trees...
Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs
# see the results
gbm_model$model_summary %>% t()
[,1]
n.trees 100.0000000
interaction.depth 3.0000000
shrinkage 0.1000000
n.minobsinnode 10.0000000
auc 1.0000000
auc_optimism_corrected 0.4883825
auc_optimism_corrected_CIL 0.4050371
auc_optimism_corrected_CIU 0.5638409
accuracy 1.0000000
accuracy_optimism_corrected 0.5539314
accuracy_optimism_corrected_CIL 0.4967160
accuracy_optimism_corrected_CIU 0.6098763
roc_c
level="genus"
Aggregate taxa
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level = level)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3183 nan 0.1000 0.0040
2 1.2770 nan 0.1000 -0.0038
3 1.2238 nan 0.1000 0.0087
4 1.1830 nan 0.1000 -0.0053
5 1.1544 nan 0.1000 -0.0104
6 1.1189 nan 0.1000 -0.0037
7 1.0779 nan 0.1000 0.0021
8 1.0531 nan 0.1000 -0.0068
9 1.0280 nan 0.1000 -0.0071
10 0.9946 nan 0.1000 -0.0055
20 0.7682 nan 0.1000 -0.0033
40 0.4929 nan 0.1000 -0.0023
60 0.2969 nan 0.1000 -0.0021
80 0.1988 nan 0.1000 -0.0010
100 0.1343 nan 0.1000 -0.0014
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3051 nan 0.1000 0.0279
2 1.2148 nan 0.1000 0.0355
3 1.1648 nan 0.1000 0.0060
4 1.0901 nan 0.1000 0.0170
5 1.0334 nan 0.1000 0.0167
6 0.9953 nan 0.1000 0.0033
7 0.9420 nan 0.1000 0.0177
8 0.9059 nan 0.1000 0.0029
9 0.8688 nan 0.1000 0.0092
10 0.8397 nan 0.1000 0.0011
20 0.5775 nan 0.1000 0.0028
40 0.2913 nan 0.1000 -0.0019
60 0.1490 nan 0.1000 -0.0014
80 0.0770 nan 0.1000 -0.0003
100 0.0404 nan 0.1000 -0.0006
120 0.0235 nan 0.1000 0.0002
140 0.0138 nan 0.1000 -0.0002
160 0.0084 nan 0.1000 -0.0000
180 0.0047 nan 0.1000 0.0000
200 0.0029 nan 0.1000 -0.0000
220 0.0017 nan 0.1000 -0.0000
240 0.0012 nan 0.1000 -0.0000
260 0.0008 nan 0.1000 0.0000
280 0.0004 nan 0.1000 -0.0000
300 0.0002 nan 0.1000 -0.0000
320 0.0001 nan 0.1000 0.0000
340 0.0001 nan 0.1000 -0.0000
360 0.0001 nan 0.1000 -0.0000
380 0.0000 nan 0.1000 -0.0000
400 0.0000 nan 0.1000 0.0000
420 0.0000 nan 0.1000 -0.0000
440 0.0000 nan 0.1000 -0.0000
460 0.0000 nan 0.1000 -0.0000
480 0.0000 nan 0.1000 -0.0000
500 0.0000 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3318 nan 0.1000 0.0124
2 1.2923 nan 0.1000 0.0148
3 1.2593 nan 0.1000 0.0049
4 1.2365 nan 0.1000 0.0030
5 1.2123 nan 0.1000 0.0125
6 1.2077 nan 0.1000 -0.0059
7 1.1926 nan 0.1000 -0.0024
8 1.1676 nan 0.1000 -0.0019
9 1.1554 nan 0.1000 -0.0011
10 1.1413 nan 0.1000 -0.0013
20 0.9933 nan 0.1000 -0.0041
40 0.8140 nan 0.1000 0.0010
60 0.7007 nan 0.1000 -0.0043
80 0.5858 nan 0.1000 -0.0024
100 0.5170 nan 0.1000 -0.0027
120 0.4552 nan 0.1000 -0.0016
140 0.4030 nan 0.1000 0.0004
160 0.3462 nan 0.1000 -0.0010
180 0.3057 nan 0.1000 -0.0016
200 0.2690 nan 0.1000 -0.0001
220 0.2348 nan 0.1000 0.0005
240 0.2067 nan 0.1000 -0.0010
260 0.1830 nan 0.1000 -0.0013
280 0.1650 nan 0.1000 -0.0010
300 0.1454 nan 0.1000 -0.0011
320 0.1276 nan 0.1000 -0.0005
340 0.1136 nan 0.1000 -0.0004
360 0.1022 nan 0.1000 -0.0007
380 0.0898 nan 0.1000 -0.0007
400 0.0802 nan 0.1000 -0.0006
420 0.0700 nan 0.1000 -0.0000
440 0.0624 nan 0.1000 -0.0002
460 0.0557 nan 0.1000 -0.0002
480 0.0496 nan 0.1000 -0.0002
500 0.0439 nan 0.1000 -0.0001
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3466 nan 0.1000 0.0083
2 1.3232 nan 0.1000 0.0073
3 1.2755 nan 0.1000 0.0103
4 1.2409 nan 0.1000 0.0092
5 1.2080 nan 0.1000 0.0031
6 1.1875 nan 0.1000 0.0002
7 1.1681 nan 0.1000 -0.0041
8 1.1462 nan 0.1000 0.0033
9 1.1219 nan 0.1000 0.0019
10 1.0909 nan 0.1000 0.0059
20 0.9208 nan 0.1000 -0.0054
40 0.6558 nan 0.1000 -0.0054
60 0.5011 nan 0.1000 -0.0002
80 0.3916 nan 0.1000 -0.0016
100 0.3172 nan 0.1000 -0.0032
120 0.2574 nan 0.1000 -0.0003
140 0.2066 nan 0.1000 -0.0024
160 0.1695 nan 0.1000 -0.0004
180 0.1348 nan 0.1000 0.0004
200 0.1085 nan 0.1000 -0.0007
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3445 nan 0.1000 0.0077
2 1.3293 nan 0.1000 -0.0055
3 1.2961 nan 0.1000 0.0121
4 1.2660 nan 0.1000 0.0056
5 1.2317 nan 0.1000 0.0137
6 1.2015 nan 0.1000 0.0085
7 1.1794 nan 0.1000 0.0040
8 1.1620 nan 0.1000 0.0007
9 1.1504 nan 0.1000 -0.0029
10 1.1377 nan 0.1000 0.0001
20 0.9625 nan 0.1000 0.0010
40 0.7693 nan 0.1000 -0.0000
60 0.6309 nan 0.1000 -0.0014
80 0.5340 nan 0.1000 -0.0003
100 0.4507 nan 0.1000 -0.0015
120 0.3716 nan 0.1000 -0.0006
140 0.3119 nan 0.1000 -0.0012
160 0.2687 nan 0.1000 -0.0018
180 0.2315 nan 0.1000 -0.0015
200 0.1976 nan 0.1000 -0.0008
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3050 nan 0.1000 0.0222
2 1.2206 nan 0.1000 0.0333
3 1.1574 nan 0.1000 0.0150
4 1.1119 nan 0.1000 0.0055
5 1.0518 nan 0.1000 0.0155
6 0.9941 nan 0.1000 0.0122
7 0.9370 nan 0.1000 0.0137
8 0.8946 nan 0.1000 0.0045
9 0.8537 nan 0.1000 0.0065
10 0.8113 nan 0.1000 0.0081
20 0.5506 nan 0.1000 0.0020
40 0.2561 nan 0.1000 -0.0033
60 0.1443 nan 0.1000 -0.0019
80 0.0804 nan 0.1000 -0.0005
100 0.0438 nan 0.1000 -0.0004
120 0.0275 nan 0.1000 -0.0003
140 0.0143 nan 0.1000 -0.0003
160 0.0110 nan 0.1000 -0.0005
180 0.0073 nan 0.1000 -0.0004
200 0.0032 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3328 nan 0.1000 0.0077
2 1.2347 nan 0.1000 0.0247
3 1.1658 nan 0.1000 0.0185
4 1.1056 nan 0.1000 0.0233
5 1.0569 nan 0.1000 0.0070
6 1.0102 nan 0.1000 0.0095
7 0.9664 nan 0.1000 0.0101
8 0.9295 nan 0.1000 0.0088
9 0.8905 nan 0.1000 0.0087
10 0.8568 nan 0.1000 0.0069
20 0.5916 nan 0.1000 0.0012
40 0.3128 nan 0.1000 -0.0012
60 0.1781 nan 0.1000 -0.0003
80 0.1032 nan 0.1000 -0.0001
100 0.0639 nan 0.1000 -0.0006
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3379 nan 0.1000 0.0098
2 1.2789 nan 0.1000 0.0206
3 1.2369 nan 0.1000 0.0051
4 1.1947 nan 0.1000 0.0058
5 1.1650 nan 0.1000 0.0054
6 1.1103 nan 0.1000 0.0139
7 1.0885 nan 0.1000 -0.0034
8 1.0631 nan 0.1000 -0.0001
9 1.0368 nan 0.1000 0.0038
10 1.0012 nan 0.1000 0.0089
20 0.8001 nan 0.1000 0.0024
40 0.5484 nan 0.1000 0.0037
60 0.4204 nan 0.1000 -0.0028
80 0.3214 nan 0.1000 -0.0000
100 0.2467 nan 0.1000 -0.0007
120 0.1907 nan 0.1000 -0.0001
140 0.1516 nan 0.1000 -0.0007
160 0.1203 nan 0.1000 -0.0005
180 0.0959 nan 0.1000 -0.0006
200 0.0746 nan 0.1000 -0.0002
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2767 nan 0.1000 0.0331
2 1.1989 nan 0.1000 0.0257
3 1.1590 nan 0.1000 -0.0011
4 1.1010 nan 0.1000 0.0179
5 1.0480 nan 0.1000 0.0175
6 1.0151 nan 0.1000 -0.0002
7 0.9757 nan 0.1000 0.0034
8 0.9473 nan 0.1000 0.0000
9 0.8983 nan 0.1000 0.0181
10 0.8509 nan 0.1000 0.0080
20 0.6015 nan 0.1000 -0.0039
40 0.3143 nan 0.1000 -0.0002
60 0.1762 nan 0.1000 0.0003
80 0.1088 nan 0.1000 0.0000
100 0.0666 nan 0.1000 -0.0002
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3550 nan 0.1000 0.0037
2 1.3252 nan 0.1000 0.0113
3 1.2963 nan 0.1000 0.0104
4 1.2744 nan 0.1000 0.0011
5 1.2504 nan 0.1000 0.0039
6 1.2327 nan 0.1000 0.0004
7 1.2141 nan 0.1000 -0.0027
8 1.1885 nan 0.1000 0.0106
9 1.1731 nan 0.1000 0.0004
10 1.1538 nan 0.1000 0.0065
20 1.0058 nan 0.1000 -0.0000
40 0.7908 nan 0.1000 -0.0020
60 0.6427 nan 0.1000 -0.0008
80 0.5434 nan 0.1000 0.0020
100 0.4527 nan 0.1000 -0.0017
120 0.3751 nan 0.1000 -0.0031
140 0.3146 nan 0.1000 0.0004
160 0.2635 nan 0.1000 0.0001
180 0.2199 nan 0.1000 0.0004
200 0.1896 nan 0.1000 -0.0007
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2968 nan 0.1000 0.0205
2 1.2346 nan 0.1000 0.0110
3 1.1779 nan 0.1000 0.0132
4 1.1277 nan 0.1000 0.0061
5 1.0871 nan 0.1000 0.0067
6 1.0367 nan 0.1000 0.0169
7 0.9847 nan 0.1000 0.0150
8 0.9209 nan 0.1000 0.0231
9 0.8891 nan 0.1000 0.0062
10 0.8402 nan 0.1000 0.0120
20 0.5809 nan 0.1000 0.0039
40 0.3290 nan 0.1000 -0.0010
60 0.1966 nan 0.1000 0.0033
80 0.1147 nan 0.1000 -0.0001
100 0.0688 nan 0.1000 0.0006
Using 100 trees...
Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs
# see the results
gbm_model$model_summary %>% t()
[,1]
n.trees 100.0000000
interaction.depth 5.0000000
shrinkage 0.1000000
n.minobsinnode 10.0000000
auc 1.0000000
auc_optimism_corrected 0.5219912
auc_optimism_corrected_CIL 0.4425772
auc_optimism_corrected_CIU 0.6077597
accuracy 1.0000000
accuracy_optimism_corrected 0.5170581
accuracy_optimism_corrected_CIL 0.4515944
accuracy_optimism_corrected_CIU 0.5991346
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2683 nan 0.1000 0.0007
2 1.2487 nan 0.1000 0.0022
3 1.2406 nan 0.1000 -0.0010
4 1.2289 nan 0.1000 0.0015
5 1.2171 nan 0.1000 -0.0055
6 1.2040 nan 0.1000 -0.0032
7 1.1910 nan 0.1000 -0.0069
8 1.1722 nan 0.1000 0.0021
9 1.1558 nan 0.1000 0.0009
10 1.1412 nan 0.1000 0.0032
20 1.0331 nan 0.1000 -0.0027
40 0.8764 nan 0.1000 -0.0012
60 0.7330 nan 0.1000 -0.0009
80 0.6215 nan 0.1000 0.0009
100 0.5411 nan 0.1000 -0.0016
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2167 nan 0.1000 0.0277
2 1.1840 nan 0.1000 0.0110
3 1.1178 nan 0.1000 0.0184
4 1.0918 nan 0.1000 0.0044
5 1.0576 nan 0.1000 0.0081
6 1.0328 nan 0.1000 0.0038
7 0.9937 nan 0.1000 0.0120
8 0.9576 nan 0.1000 0.0098
9 0.9207 nan 0.1000 0.0004
10 0.8869 nan 0.1000 0.0123
20 0.6769 nan 0.1000 -0.0005
40 0.4068 nan 0.1000 0.0015
60 0.2542 nan 0.1000 0.0005
80 0.1628 nan 0.1000 0.0001
100 0.1080 nan 0.1000 0.0001
120 0.0734 nan 0.1000 -0.0005
140 0.0503 nan 0.1000 -0.0001
160 0.0357 nan 0.1000 -0.0003
180 0.0265 nan 0.1000 -0.0002
200 0.0189 nan 0.1000 -0.0002
220 0.0137 nan 0.1000 0.0000
240 0.0099 nan 0.1000 -0.0002
260 0.0072 nan 0.1000 -0.0001
280 0.0051 nan 0.1000 -0.0001
300 0.0037 nan 0.1000 0.0000
320 0.0026 nan 0.1000 -0.0000
340 0.0017 nan 0.1000 -0.0000
360 0.0011 nan 0.1000 -0.0000
380 0.0009 nan 0.1000 -0.0000
400 0.0006 nan 0.1000 -0.0000
420 0.0004 nan 0.1000 -0.0000
440 0.0003 nan 0.1000 0.0000
460 0.0002 nan 0.1000 -0.0000
480 0.0002 nan 0.1000 -0.0000
500 0.0001 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2583 nan 0.1000 0.0185
2 1.2003 nan 0.1000 0.0212
3 1.1318 nan 0.1000 0.0182
4 1.0666 nan 0.1000 0.0172
5 1.0226 nan 0.1000 0.0057
6 0.9795 nan 0.1000 0.0149
7 0.9323 nan 0.1000 0.0117
8 0.8840 nan 0.1000 0.0141
9 0.8362 nan 0.1000 0.0166
10 0.7890 nan 0.1000 0.0127
20 0.5348 nan 0.1000 0.0023
40 0.2566 nan 0.1000 -0.0008
60 0.1357 nan 0.1000 -0.0000
80 0.0702 nan 0.1000 -0.0001
100 0.0386 nan 0.1000 0.0002
120 0.0216 nan 0.1000 0.0000
140 0.0124 nan 0.1000 0.0001
160 0.0071 nan 0.1000 -0.0000
180 0.0040 nan 0.1000 0.0000
200 0.0022 nan 0.1000 -0.0000
220 0.0012 nan 0.1000 -0.0000
240 0.0007 nan 0.1000 0.0000
260 0.0004 nan 0.1000 0.0000
280 0.0002 nan 0.1000 0.0000
300 0.0001 nan 0.1000 -0.0000
320 0.0001 nan 0.1000 0.0000
340 0.0000 nan 0.1000 0.0000
360 0.0000 nan 0.1000 0.0000
380 0.0000 nan 0.1000 -0.0000
400 0.0000 nan 0.1000 -0.0000
420 0.0000 nan 0.1000 -0.0000
440 0.0000 nan 0.1000 -0.0000
460 0.0000 nan 0.1000 0.0000
480 0.0000 nan 0.1000 -0.0000
500 0.0000 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2990 nan 0.1000 0.0048
2 1.2667 nan 0.1000 0.0127
3 1.2462 nan 0.1000 0.0053
4 1.2217 nan 0.1000 0.0090
5 1.1995 nan 0.1000 0.0044
6 1.1829 nan 0.1000 0.0021
7 1.1694 nan 0.1000 0.0022
8 1.1531 nan 0.1000 0.0058
9 1.1344 nan 0.1000 0.0054
10 1.1142 nan 0.1000 0.0073
20 0.9741 nan 0.1000 0.0005
40 0.7976 nan 0.1000 -0.0005
60 0.6656 nan 0.1000 0.0013
80 0.5650 nan 0.1000 -0.0023
100 0.4863 nan 0.1000 -0.0010
120 0.4236 nan 0.1000 -0.0020
140 0.3707 nan 0.1000 -0.0002
160 0.3286 nan 0.1000 -0.0018
180 0.2904 nan 0.1000 -0.0014
200 0.2595 nan 0.1000 -0.0003
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2211 nan 0.1000 0.0299
2 1.1691 nan 0.1000 0.0139
3 1.1228 nan 0.1000 0.0053
4 1.0758 nan 0.1000 0.0129
5 1.0275 nan 0.1000 0.0084
6 0.9848 nan 0.1000 0.0081
7 0.9409 nan 0.1000 0.0088
8 0.9016 nan 0.1000 0.0094
9 0.8621 nan 0.1000 0.0063
10 0.8168 nan 0.1000 0.0106
20 0.5450 nan 0.1000 -0.0008
40 0.2750 nan 0.1000 -0.0006
60 0.1469 nan 0.1000 -0.0001
80 0.0843 nan 0.1000 -0.0009
100 0.0481 nan 0.1000 -0.0002
120 0.0276 nan 0.1000 0.0001
140 0.0163 nan 0.1000 -0.0001
160 0.0098 nan 0.1000 -0.0000
180 0.0063 nan 0.1000 -0.0000
200 0.0037 nan 0.1000 -0.0000
220 0.0024 nan 0.1000 -0.0000
240 0.0015 nan 0.1000 -0.0000
260 0.0009 nan 0.1000 -0.0000
280 0.0006 nan 0.1000 -0.0000
300 0.0003 nan 0.1000 -0.0000
320 0.0002 nan 0.1000 0.0000
340 0.0002 nan 0.1000 -0.0000
360 0.0001 nan 0.1000 -0.0000
380 0.0001 nan 0.1000 -0.0000
400 0.0000 nan 0.1000 -0.0000
420 0.0000 nan 0.1000 -0.0000
440 0.0000 nan 0.1000 -0.0000
460 0.0000 nan 0.1000 -0.0000
480 0.0000 nan 0.1000 -0.0000
500 0.0000 nan 0.1000 0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2620 nan 0.1000 0.0219
2 1.1922 nan 0.1000 0.0262
3 1.1480 nan 0.1000 0.0108
4 1.1156 nan 0.1000 0.0039
5 1.0639 nan 0.1000 0.0232
6 1.0185 nan 0.1000 0.0145
7 0.9782 nan 0.1000 0.0104
8 0.9542 nan 0.1000 0.0001
9 0.9263 nan 0.1000 0.0040
10 0.8850 nan 0.1000 0.0122
20 0.6470 nan 0.1000 0.0005
40 0.3734 nan 0.1000 0.0030
60 0.2337 nan 0.1000 0.0004
80 0.1439 nan 0.1000 -0.0006
100 0.0909 nan 0.1000 0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2695 nan 0.1000 0.0110
2 1.2097 nan 0.1000 0.0165
3 1.1706 nan 0.1000 0.0136
4 1.1297 nan 0.1000 0.0106
5 1.0905 nan 0.1000 0.0033
6 1.0620 nan 0.1000 0.0053
7 1.0329 nan 0.1000 0.0027
8 1.0020 nan 0.1000 0.0016
9 0.9858 nan 0.1000 0.0020
10 0.9527 nan 0.1000 0.0110
20 0.7379 nan 0.1000 -0.0015
40 0.4749 nan 0.1000 -0.0004
60 0.3318 nan 0.1000 -0.0006
80 0.2278 nan 0.1000 0.0007
100 0.1613 nan 0.1000 0.0002
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2097 nan 0.1000 0.0101
2 1.1706 nan 0.1000 0.0105
3 1.1542 nan 0.1000 -0.0015
4 1.1251 nan 0.1000 0.0095
5 1.1017 nan 0.1000 0.0036
6 1.0720 nan 0.1000 0.0012
7 1.0367 nan 0.1000 0.0057
8 1.0179 nan 0.1000 0.0000
9 0.9916 nan 0.1000 0.0062
10 0.9616 nan 0.1000 0.0101
20 0.7469 nan 0.1000 -0.0027
40 0.4640 nan 0.1000 0.0007
60 0.3184 nan 0.1000 -0.0012
80 0.2080 nan 0.1000 -0.0007
100 0.1463 nan 0.1000 -0.0010
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2819 nan 0.1000 0.0033
2 1.2291 nan 0.1000 0.0125
3 1.1768 nan 0.1000 0.0156
4 1.1117 nan 0.1000 0.0215
5 1.0669 nan 0.1000 0.0117
6 1.0289 nan 0.1000 0.0112
7 0.9915 nan 0.1000 0.0069
8 0.9519 nan 0.1000 0.0097
9 0.9083 nan 0.1000 0.0144
10 0.8814 nan 0.1000 0.0027
20 0.6689 nan 0.1000 0.0010
40 0.3905 nan 0.1000 -0.0015
60 0.2519 nan 0.1000 -0.0001
80 0.1674 nan 0.1000 -0.0006
100 0.1143 nan 0.1000 -0.0007
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2236 nan 0.1000 0.0151
2 1.1714 nan 0.1000 0.0139
3 1.1097 nan 0.1000 0.0182
4 1.0561 nan 0.1000 0.0155
5 1.0134 nan 0.1000 0.0139
6 0.9758 nan 0.1000 0.0086
7 0.9354 nan 0.1000 0.0135
8 0.9011 nan 0.1000 0.0114
9 0.8733 nan 0.1000 0.0066
10 0.8378 nan 0.1000 0.0097
20 0.6090 nan 0.1000 0.0000
40 0.3542 nan 0.1000 0.0016
60 0.2241 nan 0.1000 -0.0013
80 0.1508 nan 0.1000 -0.0009
100 0.1035 nan 0.1000 -0.0005
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2612 nan 0.1000 0.0137
2 1.2288 nan 0.1000 0.0061
3 1.1939 nan 0.1000 0.0109
4 1.1832 nan 0.1000 -0.0027
5 1.1677 nan 0.1000 0.0032
6 1.1355 nan 0.1000 0.0061
7 1.1135 nan 0.1000 0.0029
8 1.0922 nan 0.1000 0.0018
9 1.0709 nan 0.1000 -0.0002
10 1.0464 nan 0.1000 0.0081
20 0.8811 nan 0.1000 0.0017
40 0.6671 nan 0.1000 -0.0013
60 0.5276 nan 0.1000 0.0014
80 0.4093 nan 0.1000 -0.0004
100 0.3291 nan 0.1000 -0.0005
120 0.2686 nan 0.1000 -0.0014
140 0.2156 nan 0.1000 -0.0012
160 0.1746 nan 0.1000 0.0000
180 0.1407 nan 0.1000 -0.0002
200 0.1190 nan 0.1000 -0.0002
220 0.0972 nan 0.1000 -0.0004
240 0.0802 nan 0.1000 -0.0002
260 0.0660 nan 0.1000 -0.0000
280 0.0557 nan 0.1000 -0.0006
300 0.0471 nan 0.1000 -0.0005
320 0.0390 nan 0.1000 -0.0002
340 0.0322 nan 0.1000 -0.0002
360 0.0266 nan 0.1000 -0.0002
380 0.0220 nan 0.1000 -0.0000
400 0.0182 nan 0.1000 -0.0000
420 0.0152 nan 0.1000 -0.0000
440 0.0122 nan 0.1000 -0.0002
460 0.0101 nan 0.1000 -0.0000
480 0.0085 nan 0.1000 -0.0001
500 0.0069 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs
# see the results
gbm_model$model_summary %>% t()
[,1]
n.trees 100.0000000
interaction.depth 3.0000000
shrinkage 0.1000000
n.minobsinnode 30.0000000
auc 0.9919255
auc_optimism_corrected 0.5826131
auc_optimism_corrected_CIL 0.5518379
auc_optimism_corrected_CIU 0.6214075
accuracy 0.9326923
accuracy_optimism_corrected 0.6621439
accuracy_optimism_corrected_CIL 0.6152162
accuracy_optimism_corrected_CIU 0.7468750
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_clr")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2838 nan 0.1000 -0.0092
2 1.2774 nan 0.1000 -0.0044
3 1.2723 nan 0.1000 -0.0032
4 1.2650 nan 0.1000 0.0012
5 1.2556 nan 0.1000 -0.0016
6 1.2479 nan 0.1000 -0.0021
7 1.2407 nan 0.1000 -0.0015
8 1.2367 nan 0.1000 -0.0047
9 1.2325 nan 0.1000 -0.0032
10 1.2240 nan 0.1000 -0.0001
20 1.1738 nan 0.1000 -0.0030
40 1.0973 nan 0.1000 -0.0018
60 1.0121 nan 0.1000 -0.0048
80 0.9433 nan 0.1000 -0.0043
100 0.8845 nan 0.1000 -0.0026
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2914 nan 0.1000 0.0012
2 1.2741 nan 0.1000 0.0004
3 1.2569 nan 0.1000 0.0027
4 1.2424 nan 0.1000 -0.0007
5 1.2272 nan 0.1000 0.0012
6 1.2089 nan 0.1000 0.0058
7 1.1903 nan 0.1000 0.0020
8 1.1759 nan 0.1000 0.0030
9 1.1634 nan 0.1000 0.0009
10 1.1491 nan 0.1000 -0.0009
20 1.0331 nan 0.1000 -0.0003
40 0.8678 nan 0.1000 -0.0023
60 0.7422 nan 0.1000 -0.0011
80 0.6361 nan 0.1000 -0.0005
100 0.5459 nan 0.1000 -0.0006
120 0.4770 nan 0.1000 0.0000
140 0.4151 nan 0.1000 -0.0007
160 0.3666 nan 0.1000 -0.0011
180 0.3220 nan 0.1000 -0.0021
200 0.2865 nan 0.1000 -0.0009
220 0.2540 nan 0.1000 -0.0014
240 0.2239 nan 0.1000 0.0000
260 0.1971 nan 0.1000 -0.0005
280 0.1785 nan 0.1000 -0.0007
300 0.1614 nan 0.1000 -0.0001
320 0.1452 nan 0.1000 -0.0006
340 0.1287 nan 0.1000 -0.0007
360 0.1130 nan 0.1000 -0.0000
380 0.1011 nan 0.1000 -0.0005
400 0.0908 nan 0.1000 -0.0002
420 0.0820 nan 0.1000 -0.0001
440 0.0737 nan 0.1000 -0.0000
460 0.0655 nan 0.1000 -0.0001
480 0.0593 nan 0.1000 -0.0002
500 0.0535 nan 0.1000 -0.0005
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2560 nan 0.1000 0.0018
2 1.2010 nan 0.1000 0.0111
3 1.1487 nan 0.1000 0.0056
4 1.0803 nan 0.1000 0.0277
5 1.0411 nan 0.1000 0.0032
6 0.9953 nan 0.1000 0.0107
7 0.9584 nan 0.1000 0.0017
8 0.9247 nan 0.1000 0.0054
9 0.8874 nan 0.1000 0.0027
10 0.8496 nan 0.1000 0.0133
20 0.5815 nan 0.1000 0.0011
40 0.2876 nan 0.1000 0.0011
60 0.1582 nan 0.1000 -0.0002
80 0.0873 nan 0.1000 -0.0007
100 0.0500 nan 0.1000 -0.0003
120 0.0297 nan 0.1000 -0.0004
140 0.0175 nan 0.1000 -0.0002
160 0.0106 nan 0.1000 -0.0000
180 0.0060 nan 0.1000 0.0000
200 0.0036 nan 0.1000 0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3011 nan 0.1000 0.0009
2 1.2808 nan 0.1000 0.0095
3 1.2662 nan 0.1000 0.0002
4 1.2465 nan 0.1000 0.0026
5 1.2276 nan 0.1000 0.0073
6 1.2098 nan 0.1000 0.0023
7 1.1957 nan 0.1000 0.0042
8 1.1856 nan 0.1000 -0.0011
9 1.1712 nan 0.1000 0.0029
10 1.1607 nan 0.1000 -0.0005
20 1.0600 nan 0.1000 -0.0012
40 0.8825 nan 0.1000 -0.0016
60 0.7629 nan 0.1000 -0.0001
80 0.6741 nan 0.1000 -0.0008
100 0.6018 nan 0.1000 -0.0006
120 0.5435 nan 0.1000 -0.0027
140 0.4800 nan 0.1000 -0.0002
160 0.4297 nan 0.1000 -0.0010
180 0.3907 nan 0.1000 -0.0015
200 0.3541 nan 0.1000 -0.0011
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2446 nan 0.1000 0.0024
2 1.2079 nan 0.1000 0.0043
3 1.1670 nan 0.1000 0.0029
4 1.1338 nan 0.1000 0.0036
5 1.0952 nan 0.1000 0.0076
6 1.0654 nan 0.1000 0.0010
7 1.0328 nan 0.1000 0.0080
8 0.9991 nan 0.1000 0.0085
9 0.9726 nan 0.1000 0.0055
10 0.9547 nan 0.1000 0.0001
20 0.7570 nan 0.1000 0.0017
40 0.5173 nan 0.1000 0.0005
60 0.3630 nan 0.1000 -0.0006
80 0.2561 nan 0.1000 0.0001
100 0.1842 nan 0.1000 -0.0004
120 0.1374 nan 0.1000 -0.0002
140 0.1022 nan 0.1000 -0.0008
160 0.0745 nan 0.1000 -0.0008
180 0.0562 nan 0.1000 0.0001
200 0.0412 nan 0.1000 -0.0001
220 0.0311 nan 0.1000 -0.0002
240 0.0236 nan 0.1000 -0.0001
260 0.0179 nan 0.1000 -0.0000
280 0.0136 nan 0.1000 0.0000
300 0.0105 nan 0.1000 -0.0001
320 0.0076 nan 0.1000 -0.0000
340 0.0056 nan 0.1000 -0.0000
360 0.0041 nan 0.1000 -0.0000
380 0.0033 nan 0.1000 -0.0001
400 0.0024 nan 0.1000 -0.0000
420 0.0019 nan 0.1000 -0.0000
440 0.0015 nan 0.1000 -0.0000
460 0.0011 nan 0.1000 -0.0000
480 0.0009 nan 0.1000 -0.0000
500 0.0006 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1361 nan 0.1000 0.0015
2 1.1205 nan 0.1000 0.0028
3 1.0984 nan 0.1000 0.0067
4 1.0881 nan 0.1000 0.0009
5 1.0773 nan 0.1000 -0.0082
6 1.0613 nan 0.1000 -0.0000
7 1.0514 nan 0.1000 -0.0013
8 1.0364 nan 0.1000 0.0013
9 1.0286 nan 0.1000 -0.0024
10 1.0161 nan 0.1000 0.0034
20 0.9200 nan 0.1000 0.0020
40 0.7681 nan 0.1000 -0.0001
60 0.6444 nan 0.1000 -0.0004
80 0.5666 nan 0.1000 -0.0006
100 0.5010 nan 0.1000 0.0002
120 0.4411 nan 0.1000 -0.0016
140 0.3914 nan 0.1000 0.0000
160 0.3467 nan 0.1000 -0.0011
180 0.3076 nan 0.1000 -0.0008
200 0.2743 nan 0.1000 -0.0014
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2733 nan 0.1000 0.0099
2 1.2333 nan 0.1000 0.0030
3 1.1992 nan 0.1000 0.0067
4 1.1519 nan 0.1000 0.0170
5 1.1209 nan 0.1000 0.0022
6 1.0877 nan 0.1000 0.0082
7 1.0618 nan 0.1000 0.0032
8 1.0270 nan 0.1000 0.0063
9 1.0048 nan 0.1000 0.0005
10 0.9782 nan 0.1000 0.0047
20 0.7707 nan 0.1000 -0.0061
40 0.4897 nan 0.1000 0.0004
60 0.3420 nan 0.1000 0.0004
80 0.2433 nan 0.1000 0.0005
100 0.1758 nan 0.1000 -0.0017
120 0.1282 nan 0.1000 -0.0007
140 0.0933 nan 0.1000 -0.0003
160 0.0687 nan 0.1000 -0.0005
180 0.0508 nan 0.1000 0.0001
200 0.0377 nan 0.1000 -0.0002
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2729 nan 0.1000 0.0045
2 1.2357 nan 0.1000 0.0099
3 1.1916 nan 0.1000 0.0051
4 1.1456 nan 0.1000 0.0141
5 1.0914 nan 0.1000 0.0140
6 1.0637 nan 0.1000 0.0011
7 1.0433 nan 0.1000 -0.0064
8 1.0239 nan 0.1000 0.0037
9 1.0016 nan 0.1000 0.0036
10 0.9647 nan 0.1000 0.0053
20 0.7634 nan 0.1000 -0.0030
40 0.5029 nan 0.1000 0.0006
60 0.3518 nan 0.1000 -0.0021
80 0.2450 nan 0.1000 0.0008
100 0.1738 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2799 nan 0.1000 0.0078
2 1.2434 nan 0.1000 0.0065
3 1.1956 nan 0.1000 0.0144
4 1.1689 nan 0.1000 0.0028
5 1.1360 nan 0.1000 0.0056
6 1.1078 nan 0.1000 0.0038
7 1.0664 nan 0.1000 0.0118
8 1.0307 nan 0.1000 0.0056
9 1.0070 nan 0.1000 0.0035
10 0.9771 nan 0.1000 0.0036
20 0.7758 nan 0.1000 0.0005
40 0.5067 nan 0.1000 -0.0045
60 0.3358 nan 0.1000 -0.0022
80 0.2340 nan 0.1000 0.0000
100 0.1689 nan 0.1000 -0.0003
120 0.1175 nan 0.1000 0.0001
140 0.0841 nan 0.1000 0.0003
160 0.0611 nan 0.1000 -0.0002
180 0.0445 nan 0.1000 -0.0001
200 0.0318 nan 0.1000 0.0001
220 0.0228 nan 0.1000 0.0001
240 0.0169 nan 0.1000 -0.0001
260 0.0131 nan 0.1000 -0.0001
280 0.0096 nan 0.1000 -0.0001
300 0.0070 nan 0.1000 -0.0000
320 0.0050 nan 0.1000 -0.0000
340 0.0036 nan 0.1000 -0.0000
360 0.0026 nan 0.1000 -0.0000
380 0.0019 nan 0.1000 0.0000
400 0.0014 nan 0.1000 -0.0000
420 0.0010 nan 0.1000 0.0000
440 0.0008 nan 0.1000 -0.0000
460 0.0006 nan 0.1000 0.0000
480 0.0004 nan 0.1000 -0.0000
500 0.0003 nan 0.1000 0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2917 nan 0.1000 -0.0021
2 1.2695 nan 0.1000 0.0035
3 1.2550 nan 0.1000 -0.0022
4 1.2303 nan 0.1000 0.0067
5 1.2188 nan 0.1000 -0.0001
6 1.2013 nan 0.1000 0.0065
7 1.1897 nan 0.1000 0.0003
8 1.1813 nan 0.1000 -0.0002
9 1.1723 nan 0.1000 -0.0010
10 1.1569 nan 0.1000 0.0029
20 1.0445 nan 0.1000 -0.0035
40 0.8863 nan 0.1000 0.0026
60 0.7665 nan 0.1000 -0.0010
80 0.6686 nan 0.1000 -0.0009
100 0.5774 nan 0.1000 0.0004
120 0.5135 nan 0.1000 0.0001
140 0.4501 nan 0.1000 -0.0014
160 0.3959 nan 0.1000 -0.0004
180 0.3514 nan 0.1000 -0.0005
200 0.3120 nan 0.1000 -0.0004
220 0.2815 nan 0.1000 -0.0008
240 0.2518 nan 0.1000 -0.0021
260 0.2271 nan 0.1000 -0.0011
280 0.2069 nan 0.1000 -0.0003
300 0.1844 nan 0.1000 -0.0004
320 0.1670 nan 0.1000 -0.0003
340 0.1504 nan 0.1000 -0.0002
360 0.1343 nan 0.1000 -0.0002
380 0.1214 nan 0.1000 -0.0006
400 0.1087 nan 0.1000 0.0002
420 0.0978 nan 0.1000 -0.0003
440 0.0872 nan 0.1000 0.0001
460 0.0774 nan 0.1000 -0.0001
480 0.0698 nan 0.1000 -0.0001
500 0.0639 nan 0.1000 -0.0001
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2467 nan 0.1000 0.0058
2 1.2133 nan 0.1000 -0.0001
3 1.1780 nan 0.1000 0.0058
4 1.1482 nan 0.1000 0.0065
5 1.1296 nan 0.1000 -0.0015
6 1.1008 nan 0.1000 0.0067
7 1.0678 nan 0.1000 0.0059
8 1.0484 nan 0.1000 0.0014
9 1.0282 nan 0.1000 0.0021
10 0.9951 nan 0.1000 0.0040
20 0.7597 nan 0.1000 0.0069
40 0.4977 nan 0.1000 -0.0018
60 0.3398 nan 0.1000 -0.0022
80 0.2346 nan 0.1000 -0.0005
100 0.1634 nan 0.1000 -0.0005
120 0.1183 nan 0.1000 -0.0013
140 0.0828 nan 0.1000 -0.0001
160 0.0600 nan 0.1000 -0.0001
180 0.0426 nan 0.1000 -0.0001
200 0.0302 nan 0.1000 -0.0002
Using 100 trees...
Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs
# see the results
gbm_model$model_summary %>% t()
[,1]
n.trees 100.0000000
interaction.depth 1.0000000
shrinkage 0.1000000
n.minobsinnode 30.0000000
auc 0.9542386
auc_optimism_corrected 0.4121876
auc_optimism_corrected_CIL 0.3367588
auc_optimism_corrected_CIU 0.5335903
accuracy 0.8388626
accuracy_optimism_corrected 0.4915539
accuracy_optimism_corrected_CIL 0.3776215
accuracy_optimism_corrected_CIU 0.5788824
roc_c
level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_ra")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary
supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.0000000
lambda 101.8044909
auc 0.9737769
auc_czech 0.9622426
auc_no 0.9884498
auc_optimism_corrected 0.4957353
auc_optimism_corrected_CIL 0.3553380
auc_optimism_corrected_CIU 0.5967854
accuracy 0.5174825
accuracy_czech NaN
accuracy_no 0.4390244
accuracy_optimism_corrected 0.4677574
accuracy_optimism_corrected_CIL 0.3763605
accuracy_optimism_corrected_CIU 0.5596841
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_ra")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary
supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.8000000
lambda 0.1250960
auc 0.5000000
auc_czech 0.5000000
auc_no 0.5000000
auc_optimism_corrected 0.4593353
auc_optimism_corrected_CIL 0.4024036
auc_optimism_corrected_CIU 0.5294085
accuracy 0.6634615
accuracy_czech NaN
accuracy_no 0.6219512
accuracy_optimism_corrected 0.6254869
accuracy_optimism_corrected_CIL 0.5583482
accuracy_optimism_corrected_CIU 0.7086390
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_ra")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary
supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.6000000
lambda 0.1685547
auc 0.5449672
auc_czech 0.5635101
auc_no 0.5139860
auc_optimism_corrected 0.5309733
auc_optimism_corrected_CIL 0.4496319
auc_optimism_corrected_CIU 0.6138510
accuracy 0.6540284
accuracy_czech NaN
accuracy_no 0.5416667
accuracy_optimism_corrected 0.6226971
accuracy_optimism_corrected_CIL 0.5198351
accuracy_optimism_corrected_CIU 0.7231612
roc_c
level="genus"
Aggregate taxa
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level = level)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_ra")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary
supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.0000000
lambda 94.1638485
auc 0.9164384
auc_czech 0.9383117
auc_no 0.8982143
auc_optimism_corrected 0.4387094
auc_optimism_corrected_CIL 0.3321085
auc_optimism_corrected_CIU 0.5234336
accuracy 0.5104895
accuracy_czech NaN
accuracy_no 0.4878049
accuracy_optimism_corrected 0.4098286
accuracy_optimism_corrected_CIL 0.3319182
accuracy_optimism_corrected_CIU 0.4639881
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_ra")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary
supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.8000000
lambda 0.1280950
auc 0.5000000
auc_czech 0.5000000
auc_no 0.5000000
auc_optimism_corrected 0.4931307
auc_optimism_corrected_CIL 0.4189474
auc_optimism_corrected_CIU 0.6465378
accuracy 0.6634615
accuracy_czech NaN
accuracy_no 0.6219512
accuracy_optimism_corrected 0.6227011
accuracy_optimism_corrected_CIL 0.5655556
accuracy_optimism_corrected_CIU 0.6749188
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_ra")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary
supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.0000000
lambda 82.0231117
auc 0.9085765
auc_czech 0.9186869
auc_no 0.8749029
auc_optimism_corrected 0.5433459
auc_optimism_corrected_CIL 0.4667255
auc_optimism_corrected_CIU 0.6218420
accuracy 0.6540284
accuracy_czech NaN
accuracy_no 0.5416667
accuracy_optimism_corrected 0.6312833
accuracy_optimism_corrected_CIL 0.5724764
accuracy_optimism_corrected_CIU 0.6932383
roc_c
level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_ra")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 28.0000000
auc 0.6268102
auc_optimism_corrected 0.4980865
auc_optimism_corrected_CIL 0.3825790
auc_optimism_corrected_CIU 0.6788756
accuracy 0.4965035
accuracy_optimism_corrected 0.4908856
accuracy_optimism_corrected_CIL 0.3876451
accuracy_optimism_corrected_CIU 0.5826531
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_ra")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 30.0000000
auc 0.6004141
auc_optimism_corrected 0.4773470
auc_optimism_corrected_CIL 0.4016268
auc_optimism_corrected_CIU 0.5291267
accuracy 0.6394231
accuracy_optimism_corrected 0.5723595
accuracy_optimism_corrected_CIL 0.4473894
accuracy_optimism_corrected_CIU 0.6494254
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_ra")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 30.0000000
auc 0.5622891
auc_optimism_corrected 0.4896604
auc_optimism_corrected_CIL 0.4452667
auc_optimism_corrected_CIU 0.5331828
accuracy 0.6445498
accuracy_optimism_corrected 0.5767476
accuracy_optimism_corrected_CIL 0.4900313
accuracy_optimism_corrected_CIU 0.6574726
roc_c
level="genus"
Aggregate taxa
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level = level)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_ra")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 28.0000000
auc 0.5368885
auc_optimism_corrected 0.4642119
auc_optimism_corrected_CIL 0.3174013
auc_optimism_corrected_CIU 0.6018534
accuracy 0.5384615
accuracy_optimism_corrected 0.4715343
accuracy_optimism_corrected_CIL 0.3693182
accuracy_optimism_corrected_CIU 0.5633929
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_ra")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 30.0000000
auc 0.6008282
auc_optimism_corrected 0.5078505
auc_optimism_corrected_CIL 0.4423814
auc_optimism_corrected_CIU 0.5495103
accuracy 0.6682692
accuracy_optimism_corrected 0.6368878
accuracy_optimism_corrected_CIL 0.5878571
accuracy_optimism_corrected_CIU 0.6714029
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_ra")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 20.0000000
auc 0.6211535
auc_optimism_corrected 0.5026502
auc_optimism_corrected_CIL 0.4369054
auc_optimism_corrected_CIU 0.6005189
accuracy 0.6824645
accuracy_optimism_corrected 0.5846801
accuracy_optimism_corrected_CIL 0.4887763
accuracy_optimism_corrected_CIU 0.6809631
roc_c
level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_ra")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "1"
splitrule "gini"
min.node.size "5"
auc "1"
auc_optimism_corrected "0.5669823"
auc_optimism_corrected_CIL "0.4528649"
auc_optimism_corrected_CIU "0.6767989"
accuracy "0.986014"
accuracy_optimism_corrected "0.5038023"
accuracy_optimism_corrected_CIL "0.4194299"
accuracy_optimism_corrected_CIU "0.6194728"
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_ra")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "53"
splitrule "gini"
min.node.size "5"
auc "1"
auc_optimism_corrected "0.5767144"
auc_optimism_corrected_CIL "0.4718909"
auc_optimism_corrected_CIU "0.6804065"
accuracy "1"
accuracy_optimism_corrected "0.6315153"
accuracy_optimism_corrected_CIL "0.5730885"
accuracy_optimism_corrected_CIU "0.6743635"
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_ra")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "1"
splitrule "gini"
min.node.size "2"
auc "1"
auc_optimism_corrected "0.4144108"
auc_optimism_corrected_CIL "0.3644445"
auc_optimism_corrected_CIU "0.4922166"
accuracy "0.9478673"
accuracy_optimism_corrected "0.6377866"
accuracy_optimism_corrected_CIL "0.580098"
accuracy_optimism_corrected_CIU "0.7365317"
roc_c
level="genus"
Aggregate taxa
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level = level)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_ra")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "117"
splitrule "gini"
min.node.size "2"
auc "1"
auc_optimism_corrected "0.4948943"
auc_optimism_corrected_CIL "0.4067949"
auc_optimism_corrected_CIU "0.5945465"
accuracy "1"
accuracy_optimism_corrected "0.4732102"
accuracy_optimism_corrected_CIL "0.3877458"
accuracy_optimism_corrected_CIU "0.5686154"
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_ra")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "1"
splitrule "gini"
min.node.size "2"
auc "1"
auc_optimism_corrected "0.4660819"
auc_optimism_corrected_CIL "0.3747545"
auc_optimism_corrected_CIU "0.5351771"
accuracy "1"
accuracy_optimism_corrected "0.6339685"
accuracy_optimism_corrected_CIL "0.5722706"
accuracy_optimism_corrected_CIU "0.690873"
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_ra")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "5"
splitrule "gini"
min.node.size "2"
auc "1"
auc_optimism_corrected "0.5034925"
auc_optimism_corrected_CIL "0.4477087"
auc_optimism_corrected_CIU "0.5660385"
accuracy "1"
accuracy_optimism_corrected "0.6141505"
accuracy_optimism_corrected_CIL "0.5210274"
accuracy_optimism_corrected_CIU "0.6779868"
roc_c
level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_ra")
Removing 1598 ASV(s)
Removing 146 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3533 nan 0.1000 0.0058
2 1.3347 nan 0.1000 -0.0019
3 1.3052 nan 0.1000 0.0004
4 1.2941 nan 0.1000 0.0000
5 1.2766 nan 0.1000 -0.0028
6 1.2466 nan 0.1000 0.0006
7 1.2367 nan 0.1000 -0.0062
8 1.2263 nan 0.1000 -0.0067
9 1.2048 nan 0.1000 -0.0007
10 1.1903 nan 0.1000 -0.0076
20 1.0878 nan 0.1000 -0.0024
40 0.8923 nan 0.1000 0.0032
60 0.7288 nan 0.1000 -0.0007
80 0.6337 nan 0.1000 -0.0060
100 0.5305 nan 0.1000 -0.0022
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2974 nan 0.1000 0.0277
2 1.2320 nan 0.1000 0.0161
3 1.1742 nan 0.1000 0.0104
4 1.1268 nan 0.1000 0.0106
5 1.0589 nan 0.1000 0.0211
6 1.0188 nan 0.1000 0.0037
7 0.9733 nan 0.1000 0.0118
8 0.9389 nan 0.1000 0.0028
9 0.9051 nan 0.1000 0.0025
10 0.8686 nan 0.1000 0.0106
20 0.6000 nan 0.1000 -0.0004
40 0.3107 nan 0.1000 0.0021
60 0.1756 nan 0.1000 0.0027
80 0.1085 nan 0.1000 -0.0006
100 0.0654 nan 0.1000 -0.0002
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3052 nan 0.1000 0.0301
2 1.2430 nan 0.1000 0.0120
3 1.1875 nan 0.1000 0.0103
4 1.1328 nan 0.1000 0.0157
5 1.0862 nan 0.1000 0.0024
6 1.0348 nan 0.1000 0.0195
7 0.9840 nan 0.1000 0.0122
8 0.9284 nan 0.1000 0.0166
9 0.8894 nan 0.1000 0.0047
10 0.8474 nan 0.1000 0.0022
20 0.5772 nan 0.1000 0.0047
40 0.2837 nan 0.1000 -0.0000
60 0.1482 nan 0.1000 0.0009
80 0.0872 nan 0.1000 0.0006
100 0.0522 nan 0.1000 -0.0005
120 0.0289 nan 0.1000 -0.0002
140 0.0170 nan 0.1000 -0.0002
160 0.0108 nan 0.1000 -0.0001
180 0.0060 nan 0.1000 -0.0000
200 0.0043 nan 0.1000 -0.0001
220 0.0024 nan 0.1000 -0.0000
240 0.0015 nan 0.1000 -0.0000
260 0.0009 nan 0.1000 -0.0000
280 0.0006 nan 0.1000 0.0000
300 0.0004 nan 0.1000 -0.0000
320 0.0003 nan 0.1000 -0.0000
340 0.0001 nan 0.1000 -0.0000
360 0.0001 nan 0.1000 -0.0000
380 0.0000 nan 0.1000 -0.0000
400 0.0000 nan 0.1000 -0.0000
420 0.0000 nan 0.1000 0.0000
440 0.0000 nan 0.1000 0.0000
460 0.0000 nan 0.1000 0.0000
480 0.0000 nan 0.1000 0.0000
500 0.0000 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3514 nan 0.1000 0.0014
2 1.3139 nan 0.1000 0.0129
3 1.2839 nan 0.1000 0.0019
4 1.2566 nan 0.1000 0.0044
5 1.2399 nan 0.1000 -0.0039
6 1.2262 nan 0.1000 -0.0076
7 1.2055 nan 0.1000 0.0056
8 1.1860 nan 0.1000 -0.0025
9 1.1659 nan 0.1000 -0.0040
10 1.1509 nan 0.1000 0.0032
20 0.9806 nan 0.1000 -0.0013
40 0.7090 nan 0.1000 0.0020
60 0.5496 nan 0.1000 -0.0035
80 0.4208 nan 0.1000 -0.0010
100 0.3375 nan 0.1000 -0.0030
120 0.2756 nan 0.1000 -0.0010
140 0.2228 nan 0.1000 -0.0009
160 0.1729 nan 0.1000 0.0001
180 0.1391 nan 0.1000 0.0001
200 0.1110 nan 0.1000 -0.0006
220 0.0895 nan 0.1000 -0.0005
240 0.0729 nan 0.1000 -0.0005
260 0.0584 nan 0.1000 -0.0002
280 0.0470 nan 0.1000 -0.0003
300 0.0393 nan 0.1000 -0.0002
320 0.0317 nan 0.1000 -0.0001
340 0.0265 nan 0.1000 -0.0001
360 0.0213 nan 0.1000 -0.0002
380 0.0173 nan 0.1000 -0.0001
400 0.0146 nan 0.1000 -0.0001
420 0.0117 nan 0.1000 -0.0000
440 0.0093 nan 0.1000 0.0000
460 0.0074 nan 0.1000 0.0000
480 0.0061 nan 0.1000 -0.0000
500 0.0050 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3449 nan 0.1000 0.0155
2 1.3084 nan 0.1000 0.0123
3 1.2811 nan 0.1000 0.0020
4 1.2529 nan 0.1000 0.0087
5 1.2213 nan 0.1000 0.0077
6 1.2003 nan 0.1000 -0.0032
7 1.1714 nan 0.1000 0.0078
8 1.1472 nan 0.1000 0.0006
9 1.1230 nan 0.1000 0.0005
10 1.1041 nan 0.1000 0.0011
20 0.9241 nan 0.1000 -0.0046
40 0.6846 nan 0.1000 -0.0005
60 0.5336 nan 0.1000 -0.0028
80 0.4341 nan 0.1000 -0.0034
100 0.3490 nan 0.1000 -0.0019
120 0.2752 nan 0.1000 -0.0018
140 0.2161 nan 0.1000 -0.0014
160 0.1735 nan 0.1000 -0.0019
180 0.1393 nan 0.1000 -0.0005
200 0.1118 nan 0.1000 -0.0011
220 0.0876 nan 0.1000 -0.0002
240 0.0735 nan 0.1000 0.0003
260 0.0604 nan 0.1000 -0.0001
280 0.0498 nan 0.1000 -0.0004
300 0.0403 nan 0.1000 -0.0001
320 0.0337 nan 0.1000 -0.0002
340 0.0280 nan 0.1000 -0.0001
360 0.0240 nan 0.1000 -0.0001
380 0.0192 nan 0.1000 -0.0000
400 0.0155 nan 0.1000 -0.0000
420 0.0125 nan 0.1000 0.0000
440 0.0103 nan 0.1000 -0.0000
460 0.0083 nan 0.1000 -0.0001
480 0.0069 nan 0.1000 -0.0001
500 0.0056 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3785 nan 0.1000 -0.0101
2 1.3525 nan 0.1000 0.0066
3 1.3225 nan 0.1000 0.0040
4 1.3096 nan 0.1000 -0.0071
5 1.2884 nan 0.1000 0.0039
6 1.2634 nan 0.1000 0.0008
7 1.2478 nan 0.1000 0.0029
8 1.2294 nan 0.1000 0.0008
9 1.2124 nan 0.1000 0.0063
10 1.1999 nan 0.1000 0.0007
20 1.1019 nan 0.1000 0.0011
40 0.9031 nan 0.1000 -0.0043
60 0.7603 nan 0.1000 -0.0018
80 0.6568 nan 0.1000 -0.0038
100 0.5726 nan 0.1000 -0.0010
120 0.4990 nan 0.1000 -0.0015
140 0.4378 nan 0.1000 -0.0039
160 0.3836 nan 0.1000 0.0002
180 0.3437 nan 0.1000 -0.0002
200 0.3042 nan 0.1000 -0.0025
220 0.2686 nan 0.1000 -0.0003
240 0.2446 nan 0.1000 -0.0008
260 0.2194 nan 0.1000 -0.0012
280 0.1974 nan 0.1000 -0.0005
300 0.1737 nan 0.1000 -0.0001
320 0.1569 nan 0.1000 -0.0009
340 0.1404 nan 0.1000 -0.0006
360 0.1273 nan 0.1000 -0.0008
380 0.1141 nan 0.1000 -0.0006
400 0.1031 nan 0.1000 -0.0012
420 0.0942 nan 0.1000 0.0001
440 0.0858 nan 0.1000 -0.0005
460 0.0798 nan 0.1000 -0.0007
480 0.0722 nan 0.1000 -0.0002
500 0.0652 nan 0.1000 -0.0002
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3569 nan 0.1000 0.0004
2 1.3305 nan 0.1000 0.0067
3 1.3074 nan 0.1000 0.0030
4 1.2791 nan 0.1000 0.0037
5 1.2596 nan 0.1000 -0.0013
6 1.2436 nan 0.1000 0.0001
7 1.2257 nan 0.1000 -0.0016
8 1.2055 nan 0.1000 -0.0011
9 1.1887 nan 0.1000 -0.0003
10 1.1741 nan 0.1000 -0.0005
20 1.0208 nan 0.1000 0.0004
40 0.8133 nan 0.1000 -0.0006
60 0.6542 nan 0.1000 -0.0021
80 0.5379 nan 0.1000 -0.0023
100 0.4599 nan 0.1000 -0.0031
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3297 nan 0.1000 0.0049
2 1.2817 nan 0.1000 0.0012
3 1.2242 nan 0.1000 0.0211
4 1.1878 nan 0.1000 0.0012
5 1.1395 nan 0.1000 0.0111
6 1.0954 nan 0.1000 0.0051
7 1.0600 nan 0.1000 0.0044
8 1.0161 nan 0.1000 0.0089
9 0.9887 nan 0.1000 -0.0012
10 0.9557 nan 0.1000 -0.0003
20 0.7187 nan 0.1000 0.0024
40 0.3925 nan 0.1000 0.0012
60 0.2487 nan 0.1000 -0.0023
80 0.1567 nan 0.1000 0.0011
100 0.1039 nan 0.1000 -0.0014
120 0.0700 nan 0.1000 -0.0005
140 0.0451 nan 0.1000 -0.0003
160 0.0299 nan 0.1000 -0.0004
180 0.0208 nan 0.1000 -0.0002
200 0.0143 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3302 nan 0.1000 0.0216
2 1.2812 nan 0.1000 0.0144
3 1.2430 nan 0.1000 0.0089
4 1.2285 nan 0.1000 -0.0018
5 1.1963 nan 0.1000 -0.0031
6 1.1609 nan 0.1000 0.0171
7 1.1311 nan 0.1000 -0.0032
8 1.1103 nan 0.1000 0.0085
9 1.0765 nan 0.1000 0.0101
10 1.0462 nan 0.1000 0.0099
20 0.8354 nan 0.1000 0.0029
40 0.6035 nan 0.1000 0.0021
60 0.4410 nan 0.1000 -0.0016
80 0.3224 nan 0.1000 0.0007
100 0.2380 nan 0.1000 -0.0000
120 0.1809 nan 0.1000 -0.0007
140 0.1344 nan 0.1000 -0.0001
160 0.1034 nan 0.1000 0.0001
180 0.0778 nan 0.1000 0.0002
200 0.0593 nan 0.1000 0.0002
220 0.0468 nan 0.1000 -0.0005
240 0.0365 nan 0.1000 -0.0003
260 0.0282 nan 0.1000 -0.0000
280 0.0218 nan 0.1000 -0.0001
300 0.0178 nan 0.1000 -0.0001
320 0.0139 nan 0.1000 -0.0000
340 0.0110 nan 0.1000 0.0000
360 0.0085 nan 0.1000 -0.0000
380 0.0070 nan 0.1000 -0.0001
400 0.0055 nan 0.1000 -0.0000
420 0.0043 nan 0.1000 -0.0000
440 0.0034 nan 0.1000 -0.0000
460 0.0028 nan 0.1000 -0.0000
480 0.0022 nan 0.1000 0.0000
500 0.0017 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3385 nan 0.1000 0.0080
2 1.2785 nan 0.1000 0.0276
3 1.2533 nan 0.1000 0.0061
4 1.2210 nan 0.1000 0.0104
5 1.1870 nan 0.1000 0.0050
6 1.1582 nan 0.1000 0.0028
7 1.1284 nan 0.1000 0.0075
8 1.0950 nan 0.1000 0.0128
9 1.0610 nan 0.1000 0.0113
10 1.0429 nan 0.1000 -0.0027
20 0.8268 nan 0.1000 -0.0000
40 0.5818 nan 0.1000 0.0013
60 0.4222 nan 0.1000 -0.0007
80 0.3115 nan 0.1000 0.0005
100 0.2286 nan 0.1000 -0.0004
120 0.1760 nan 0.1000 -0.0011
140 0.1366 nan 0.1000 -0.0021
160 0.1061 nan 0.1000 -0.0003
180 0.0832 nan 0.1000 -0.0003
200 0.0595 nan 0.1000 0.0004
220 0.0462 nan 0.1000 -0.0002
240 0.0355 nan 0.1000 0.0002
260 0.0269 nan 0.1000 -0.0002
280 0.0212 nan 0.1000 -0.0003
300 0.0164 nan 0.1000 -0.0001
320 0.0129 nan 0.1000 -0.0001
340 0.0098 nan 0.1000 0.0000
360 0.0073 nan 0.1000 0.0000
380 0.0056 nan 0.1000 -0.0001
400 0.0043 nan 0.1000 -0.0000
420 0.0034 nan 0.1000 -0.0000
440 0.0026 nan 0.1000 0.0000
460 0.0020 nan 0.1000 0.0000
480 0.0016 nan 0.1000 -0.0000
500 0.0013 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3458 nan 0.1000 0.0100
2 1.3139 nan 0.1000 0.0115
3 1.2862 nan 0.1000 0.0069
4 1.2691 nan 0.1000 -0.0016
5 1.2508 nan 0.1000 0.0026
6 1.2318 nan 0.1000 0.0034
7 1.2056 nan 0.1000 0.0082
8 1.1827 nan 0.1000 0.0072
9 1.1676 nan 0.1000 -0.0003
10 1.1505 nan 0.1000 0.0025
20 1.0061 nan 0.1000 -0.0010
40 0.8021 nan 0.1000 0.0001
60 0.6640 nan 0.1000 -0.0012
80 0.5488 nan 0.1000 -0.0037
100 0.4591 nan 0.1000 -0.0004
120 0.3882 nan 0.1000 -0.0012
140 0.3312 nan 0.1000 -0.0001
160 0.2867 nan 0.1000 -0.0014
180 0.2477 nan 0.1000 -0.0006
200 0.2135 nan 0.1000 -0.0003
Using 100 trees...
Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs
# see the results
gbm_model$model_summary %>% t()
[,1]
n.trees 100.0000000
interaction.depth 5.0000000
shrinkage 0.1000000
n.minobsinnode 20.0000000
auc 0.9972603
auc_optimism_corrected 0.4789823
auc_optimism_corrected_CIL 0.3795586
auc_optimism_corrected_CIU 0.5960000
accuracy 0.9650350
accuracy_optimism_corrected 0.4911041
accuracy_optimism_corrected_CIL 0.3543776
accuracy_optimism_corrected_CIU 0.6332143
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_ra")
Removing 979 ASV(s)
Removing 68 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2621 nan 0.1000 -0.0122
2 1.2456 nan 0.1000 -0.0109
3 1.2254 nan 0.1000 -0.0034
4 1.2096 nan 0.1000 -0.0025
5 1.1932 nan 0.1000 0.0011
6 1.1820 nan 0.1000 -0.0055
7 1.1580 nan 0.1000 0.0031
8 1.1375 nan 0.1000 -0.0031
9 1.1223 nan 0.1000 -0.0032
10 1.1132 nan 0.1000 -0.0056
20 0.9729 nan 0.1000 -0.0013
40 0.7620 nan 0.1000 -0.0041
60 0.6196 nan 0.1000 -0.0020
80 0.5009 nan 0.1000 -0.0024
100 0.4146 nan 0.1000 -0.0019
120 0.3443 nan 0.1000 -0.0009
140 0.2907 nan 0.1000 -0.0031
160 0.2413 nan 0.1000 -0.0010
180 0.2046 nan 0.1000 -0.0014
200 0.1719 nan 0.1000 -0.0007
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2789 nan 0.1000 0.0134
2 1.2141 nan 0.1000 0.0203
3 1.1592 nan 0.1000 0.0128
4 1.1180 nan 0.1000 0.0081
5 1.0656 nan 0.1000 0.0110
6 1.0227 nan 0.1000 0.0075
7 0.9772 nan 0.1000 0.0080
8 0.9392 nan 0.1000 0.0049
9 0.8910 nan 0.1000 0.0069
10 0.8583 nan 0.1000 0.0087
20 0.5976 nan 0.1000 0.0048
40 0.3213 nan 0.1000 -0.0003
60 0.1827 nan 0.1000 -0.0004
80 0.1117 nan 0.1000 -0.0004
100 0.0699 nan 0.1000 -0.0003
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2006 nan 0.1000 0.0130
2 1.1450 nan 0.1000 0.0115
3 1.0920 nan 0.1000 0.0101
4 1.0470 nan 0.1000 0.0116
5 1.0115 nan 0.1000 0.0063
6 0.9677 nan 0.1000 0.0096
7 0.9170 nan 0.1000 0.0151
8 0.8679 nan 0.1000 0.0092
9 0.8384 nan 0.1000 0.0069
10 0.8063 nan 0.1000 0.0073
20 0.5714 nan 0.1000 -0.0007
40 0.3113 nan 0.1000 0.0005
60 0.1754 nan 0.1000 -0.0007
80 0.1000 nan 0.1000 0.0005
100 0.0588 nan 0.1000 -0.0004
120 0.0362 nan 0.1000 -0.0004
140 0.0228 nan 0.1000 0.0000
160 0.0153 nan 0.1000 -0.0001
180 0.0095 nan 0.1000 -0.0000
200 0.0060 nan 0.1000 -0.0000
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1892 nan 0.1000 0.0211
2 1.1535 nan 0.1000 0.0135
3 1.1220 nan 0.1000 0.0071
4 1.0860 nan 0.1000 0.0063
5 1.0552 nan 0.1000 0.0047
6 1.0282 nan 0.1000 -0.0002
7 1.0073 nan 0.1000 0.0031
8 0.9817 nan 0.1000 0.0024
9 0.9594 nan 0.1000 0.0021
10 0.9407 nan 0.1000 -0.0030
20 0.7490 nan 0.1000 0.0007
40 0.5078 nan 0.1000 -0.0054
60 0.3657 nan 0.1000 -0.0005
80 0.2773 nan 0.1000 -0.0032
100 0.2006 nan 0.1000 -0.0023
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2291 nan 0.1000 0.0087
2 1.2015 nan 0.1000 -0.0028
3 1.1820 nan 0.1000 0.0010
4 1.1519 nan 0.1000 0.0022
5 1.1221 nan 0.1000 0.0038
6 1.0823 nan 0.1000 0.0091
7 1.0476 nan 0.1000 0.0049
8 1.0143 nan 0.1000 0.0087
9 0.9958 nan 0.1000 -0.0022
10 0.9776 nan 0.1000 -0.0036
20 0.7769 nan 0.1000 0.0009
40 0.5318 nan 0.1000 -0.0012
60 0.3965 nan 0.1000 -0.0018
80 0.2856 nan 0.1000 -0.0017
100 0.2157 nan 0.1000 -0.0003
120 0.1627 nan 0.1000 -0.0013
140 0.1245 nan 0.1000 -0.0011
160 0.0899 nan 0.1000 -0.0002
180 0.0676 nan 0.1000 -0.0005
200 0.0531 nan 0.1000 -0.0002
220 0.0401 nan 0.1000 -0.0005
240 0.0305 nan 0.1000 -0.0003
260 0.0234 nan 0.1000 0.0000
280 0.0182 nan 0.1000 -0.0001
300 0.0146 nan 0.1000 -0.0000
320 0.0111 nan 0.1000 0.0000
340 0.0087 nan 0.1000 -0.0000
360 0.0068 nan 0.1000 -0.0000
380 0.0055 nan 0.1000 -0.0000
400 0.0045 nan 0.1000 -0.0000
420 0.0035 nan 0.1000 -0.0000
440 0.0028 nan 0.1000 -0.0000
460 0.0021 nan 0.1000 -0.0000
480 0.0018 nan 0.1000 -0.0000
500 0.0014 nan 0.1000 -0.0000
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2201 nan 0.1000 0.0062
2 1.1822 nan 0.1000 0.0076
3 1.1435 nan 0.1000 0.0052
4 1.1069 nan 0.1000 0.0073
5 1.0787 nan 0.1000 0.0015
6 1.0585 nan 0.1000 0.0042
7 1.0228 nan 0.1000 0.0100
8 0.9960 nan 0.1000 0.0048
9 0.9782 nan 0.1000 0.0002
10 0.9461 nan 0.1000 0.0060
20 0.7507 nan 0.1000 0.0017
40 0.5098 nan 0.1000 -0.0012
60 0.3695 nan 0.1000 -0.0013
80 0.2737 nan 0.1000 -0.0026
100 0.2027 nan 0.1000 -0.0006
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2650 nan 0.1000 0.0237
2 1.2163 nan 0.1000 0.0144
3 1.1831 nan 0.1000 0.0097
4 1.1308 nan 0.1000 0.0175
5 1.0983 nan 0.1000 0.0028
6 1.0703 nan 0.1000 0.0051
7 1.0463 nan 0.1000 0.0005
8 1.0188 nan 0.1000 0.0010
9 0.9927 nan 0.1000 0.0078
10 0.9692 nan 0.1000 0.0044
20 0.7832 nan 0.1000 0.0013
40 0.5331 nan 0.1000 0.0044
60 0.3829 nan 0.1000 -0.0029
80 0.2804 nan 0.1000 0.0014
100 0.2042 nan 0.1000 -0.0008
120 0.1447 nan 0.1000 -0.0007
140 0.1132 nan 0.1000 -0.0008
160 0.0839 nan 0.1000 0.0005
180 0.0659 nan 0.1000 -0.0001
200 0.0505 nan 0.1000 -0.0003
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1795 nan 0.1000 0.0186
2 1.1316 nan 0.1000 0.0097
3 1.0966 nan 0.1000 0.0071
4 1.0708 nan 0.1000 -0.0028
5 1.0397 nan 0.1000 0.0054
6 1.0061 nan 0.1000 0.0041
7 0.9640 nan 0.1000 0.0122
8 0.9357 nan 0.1000 0.0028
9 0.9171 nan 0.1000 -0.0031
10 0.8973 nan 0.1000 -0.0005
20 0.6967 nan 0.1000 0.0048
40 0.4700 nan 0.1000 -0.0001
60 0.3320 nan 0.1000 -0.0029
80 0.2346 nan 0.1000 -0.0003
100 0.1738 nan 0.1000 0.0001
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3153 nan 0.1000 0.0093
2 1.2692 nan 0.1000 0.0107
3 1.2249 nan 0.1000 0.0166
4 1.1934 nan 0.1000 0.0088
5 1.1757 nan 0.1000 -0.0102
6 1.1500 nan 0.1000 -0.0041
7 1.1073 nan 0.1000 0.0128
8 1.0792 nan 0.1000 0.0098
9 1.0542 nan 0.1000 -0.0009
10 1.0340 nan 0.1000 0.0035
20 0.8014 nan 0.1000 -0.0022
40 0.5252 nan 0.1000 -0.0010
60 0.3575 nan 0.1000 0.0013
80 0.2668 nan 0.1000 -0.0008
100 0.1932 nan 0.1000 -0.0005
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2577 nan 0.1000 0.0195
2 1.1869 nan 0.1000 0.0164
3 1.1256 nan 0.1000 0.0082
4 1.0826 nan 0.1000 0.0072
5 1.0341 nan 0.1000 0.0123
6 0.9879 nan 0.1000 0.0125
7 0.9364 nan 0.1000 0.0173
8 0.8988 nan 0.1000 0.0112
9 0.8510 nan 0.1000 0.0174
10 0.8254 nan 0.1000 0.0019
20 0.5779 nan 0.1000 -0.0050
40 0.2976 nan 0.1000 -0.0023
60 0.1648 nan 0.1000 0.0003
80 0.0977 nan 0.1000 -0.0008
100 0.0615 nan 0.1000 0.0002
120 0.0375 nan 0.1000 -0.0002
140 0.0222 nan 0.1000 -0.0000
160 0.0141 nan 0.1000 0.0000
180 0.0086 nan 0.1000 -0.0000
200 0.0054 nan 0.1000 -0.0001
220 0.0036 nan 0.1000 -0.0000
240 0.0023 nan 0.1000 -0.0000
260 0.0015 nan 0.1000 -0.0000
280 0.0010 nan 0.1000 -0.0000
300 0.0006 nan 0.1000 -0.0000
320 0.0004 nan 0.1000 0.0000
340 0.0003 nan 0.1000 -0.0000
360 0.0002 nan 0.1000 -0.0000
380 0.0001 nan 0.1000 -0.0000
400 0.0001 nan 0.1000 -0.0000
420 0.0000 nan 0.1000 -0.0000
440 0.0000 nan 0.1000 -0.0000
460 0.0000 nan 0.1000 0.0000
480 0.0000 nan 0.1000 -0.0000
500 0.0000 nan 0.1000 -0.0000
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2945 nan 0.1000 0.0110
2 1.2398 nan 0.1000 0.0072
3 1.1851 nan 0.1000 0.0071
4 1.1469 nan 0.1000 0.0031
5 1.0992 nan 0.1000 0.0088
6 1.0472 nan 0.1000 0.0150
7 0.9976 nan 0.1000 0.0122
8 0.9698 nan 0.1000 -0.0006
9 0.9335 nan 0.1000 0.0094
10 0.9077 nan 0.1000 0.0004
20 0.6311 nan 0.1000 0.0036
40 0.3470 nan 0.1000 -0.0005
60 0.2013 nan 0.1000 -0.0004
80 0.1212 nan 0.1000 -0.0005
100 0.0717 nan 0.1000 -0.0001
Using 200 trees...
Using 200 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs
# see the results
gbm_model$model_summary %>% t()
[,1]
n.trees 200.0000000
interaction.depth 5.0000000
shrinkage 0.1000000
n.minobsinnode 20.0000000
auc 1.0000000
auc_optimism_corrected 0.4843010
auc_optimism_corrected_CIL 0.4102344
auc_optimism_corrected_CIU 0.5552723
accuracy 1.0000000
accuracy_optimism_corrected 0.6023346
accuracy_optimism_corrected_CIL 0.5617955
accuracy_optimism_corrected_CIU 0.6582415
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
group, usage="ml_ra")
Removing 641 ASV(s)
Removing 104 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2802 nan 0.1000 -0.0038
2 1.2656 nan 0.1000 -0.0004
3 1.2504 nan 0.1000 -0.0036
4 1.2418 nan 0.1000 -0.0054
5 1.2333 nan 0.1000 0.0015
6 1.2195 nan 0.1000 0.0002
7 1.2067 nan 0.1000 -0.0041
8 1.2007 nan 0.1000 -0.0014
9 1.1822 nan 0.1000 0.0020
10 1.1701 nan 0.1000 -0.0036
20 1.0716 nan 0.1000 -0.0035
40 0.9348 nan 0.1000 -0.0014
60 0.8116 nan 0.1000 -0.0024
80 0.7218 nan 0.1000 -0.0003
100 0.6460 nan 0.1000 -0.0026
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2611 nan 0.1000 0.0135
2 1.2033 nan 0.1000 0.0199
3 1.1760 nan 0.1000 0.0010
4 1.1491 nan 0.1000 0.0044
5 1.1114 nan 0.1000 0.0062
6 1.0852 nan 0.1000 -0.0017
7 1.0608 nan 0.1000 -0.0025
8 1.0278 nan 0.1000 0.0080
9 1.0016 nan 0.1000 0.0051
10 0.9760 nan 0.1000 0.0012
20 0.7579 nan 0.1000 -0.0002
40 0.4971 nan 0.1000 0.0023
60 0.3365 nan 0.1000 0.0021
80 0.2410 nan 0.1000 -0.0011
100 0.1690 nan 0.1000 -0.0009
120 0.1255 nan 0.1000 -0.0001
140 0.0891 nan 0.1000 -0.0006
160 0.0639 nan 0.1000 -0.0003
180 0.0453 nan 0.1000 -0.0000
200 0.0328 nan 0.1000 0.0000
220 0.0234 nan 0.1000 0.0000
240 0.0169 nan 0.1000 -0.0000
260 0.0120 nan 0.1000 -0.0001
280 0.0085 nan 0.1000 -0.0001
300 0.0064 nan 0.1000 0.0000
320 0.0046 nan 0.1000 -0.0000
340 0.0033 nan 0.1000 -0.0000
360 0.0025 nan 0.1000 -0.0000
380 0.0018 nan 0.1000 0.0000
400 0.0014 nan 0.1000 -0.0000
420 0.0010 nan 0.1000 -0.0000
440 0.0007 nan 0.1000 -0.0000
460 0.0005 nan 0.1000 0.0000
480 0.0004 nan 0.1000 -0.0000
500 0.0003 nan 0.1000 0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2786 nan 0.1000 -0.0004
2 1.2486 nan 0.1000 0.0017
3 1.1950 nan 0.1000 0.0165
4 1.1712 nan 0.1000 -0.0010
5 1.1434 nan 0.1000 0.0006
6 1.1140 nan 0.1000 0.0037
7 1.0856 nan 0.1000 0.0048
8 1.0511 nan 0.1000 0.0088
9 1.0171 nan 0.1000 0.0065
10 0.9916 nan 0.1000 0.0031
20 0.8218 nan 0.1000 -0.0006
40 0.5662 nan 0.1000 -0.0061
60 0.4177 nan 0.1000 -0.0015
80 0.3205 nan 0.1000 -0.0010
100 0.2393 nan 0.1000 -0.0010
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1971 nan 0.1000 0.0227
2 1.1392 nan 0.1000 0.0137
3 1.0825 nan 0.1000 0.0059
4 1.0138 nan 0.1000 0.0160
5 0.9737 nan 0.1000 0.0075
6 0.9314 nan 0.1000 0.0107
7 0.8917 nan 0.1000 0.0064
8 0.8446 nan 0.1000 0.0079
9 0.8119 nan 0.1000 0.0014
10 0.7804 nan 0.1000 0.0038
20 0.5257 nan 0.1000 -0.0012
40 0.2603 nan 0.1000 0.0004
60 0.1450 nan 0.1000 -0.0006
80 0.0858 nan 0.1000 -0.0003
100 0.0490 nan 0.1000 0.0000
120 0.0297 nan 0.1000 -0.0002
140 0.0175 nan 0.1000 -0.0000
160 0.0105 nan 0.1000 -0.0000
180 0.0067 nan 0.1000 -0.0001
200 0.0040 nan 0.1000 -0.0000
220 0.0023 nan 0.1000 -0.0000
240 0.0014 nan 0.1000 -0.0000
260 0.0009 nan 0.1000 -0.0000
280 0.0005 nan 0.1000 0.0000
300 0.0004 nan 0.1000 -0.0000
320 0.0002 nan 0.1000 0.0000
340 0.0001 nan 0.1000 0.0000
360 0.0001 nan 0.1000 0.0000
380 0.0001 nan 0.1000 -0.0000
400 0.0000 nan 0.1000 -0.0000
420 0.0000 nan 0.1000 -0.0000
440 0.0000 nan 0.1000 -0.0000
460 0.0000 nan 0.1000 -0.0000
480 0.0000 nan 0.1000 0.0000
500 0.0000 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2427 nan 0.1000 0.0074
2 1.2104 nan 0.1000 -0.0016
3 1.1724 nan 0.1000 0.0093
4 1.1441 nan 0.1000 0.0013
5 1.1185 nan 0.1000 0.0048
6 1.0735 nan 0.1000 0.0106
7 1.0498 nan 0.1000 0.0040
8 1.0162 nan 0.1000 0.0075
9 0.9945 nan 0.1000 0.0001
10 0.9804 nan 0.1000 -0.0054
20 0.7761 nan 0.1000 0.0020
40 0.5270 nan 0.1000 -0.0030
60 0.3642 nan 0.1000 -0.0014
80 0.2593 nan 0.1000 0.0003
100 0.1935 nan 0.1000 -0.0013
120 0.1443 nan 0.1000 -0.0008
140 0.1065 nan 0.1000 -0.0001
160 0.0827 nan 0.1000 -0.0004
180 0.0604 nan 0.1000 -0.0000
200 0.0461 nan 0.1000 -0.0003
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2039 nan 0.1000 0.0116
2 1.1346 nan 0.1000 0.0193
3 1.0917 nan 0.1000 0.0017
4 1.0433 nan 0.1000 0.0043
5 0.9859 nan 0.1000 0.0163
6 0.9455 nan 0.1000 -0.0008
7 0.9085 nan 0.1000 0.0082
8 0.8738 nan 0.1000 0.0087
9 0.8413 nan 0.1000 0.0069
10 0.8130 nan 0.1000 0.0005
20 0.5548 nan 0.1000 0.0079
40 0.3003 nan 0.1000 0.0010
60 0.1643 nan 0.1000 0.0006
80 0.0974 nan 0.1000 -0.0001
100 0.0612 nan 0.1000 -0.0005
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2703 nan 0.1000 0.0100
2 1.2293 nan 0.1000 0.0050
3 1.1891 nan 0.1000 0.0066
4 1.1567 nan 0.1000 -0.0010
5 1.1295 nan 0.1000 0.0033
6 1.0929 nan 0.1000 0.0090
7 1.0553 nan 0.1000 0.0021
8 1.0249 nan 0.1000 0.0022
9 0.9989 nan 0.1000 0.0024
10 0.9723 nan 0.1000 0.0024
20 0.7359 nan 0.1000 0.0043
40 0.4782 nan 0.1000 -0.0024
60 0.3117 nan 0.1000 -0.0010
80 0.2144 nan 0.1000 -0.0018
100 0.1597 nan 0.1000 -0.0001
120 0.1129 nan 0.1000 0.0001
140 0.0801 nan 0.1000 0.0003
160 0.0596 nan 0.1000 -0.0002
180 0.0444 nan 0.1000 -0.0001
200 0.0328 nan 0.1000 -0.0003
220 0.0247 nan 0.1000 -0.0001
240 0.0185 nan 0.1000 -0.0000
260 0.0134 nan 0.1000 -0.0000
280 0.0100 nan 0.1000 -0.0001
300 0.0074 nan 0.1000 -0.0000
320 0.0054 nan 0.1000 -0.0000
340 0.0040 nan 0.1000 -0.0000
360 0.0029 nan 0.1000 -0.0000
380 0.0022 nan 0.1000 -0.0000
400 0.0017 nan 0.1000 -0.0000
420 0.0013 nan 0.1000 -0.0000
440 0.0009 nan 0.1000 -0.0000
460 0.0007 nan 0.1000 -0.0000
480 0.0005 nan 0.1000 -0.0000
500 0.0004 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2830 nan 0.1000 0.0056
2 1.2702 nan 0.1000 0.0005
3 1.2588 nan 0.1000 0.0024
4 1.2453 nan 0.1000 0.0025
5 1.2292 nan 0.1000 0.0021
6 1.2118 nan 0.1000 0.0034
7 1.2025 nan 0.1000 -0.0005
8 1.1937 nan 0.1000 -0.0035
9 1.1818 nan 0.1000 -0.0016
10 1.1695 nan 0.1000 0.0044
20 1.0583 nan 0.1000 0.0007
40 0.8929 nan 0.1000 0.0004
60 0.7803 nan 0.1000 -0.0011
80 0.6850 nan 0.1000 -0.0007
100 0.6165 nan 0.1000 -0.0025
120 0.5512 nan 0.1000 -0.0006
140 0.4942 nan 0.1000 -0.0011
160 0.4473 nan 0.1000 -0.0002
180 0.4101 nan 0.1000 -0.0015
200 0.3664 nan 0.1000 -0.0005
220 0.3325 nan 0.1000 -0.0002
240 0.3035 nan 0.1000 -0.0008
260 0.2762 nan 0.1000 -0.0005
280 0.2523 nan 0.1000 -0.0009
300 0.2294 nan 0.1000 -0.0005
320 0.2090 nan 0.1000 -0.0008
340 0.1914 nan 0.1000 -0.0009
360 0.1751 nan 0.1000 -0.0005
380 0.1621 nan 0.1000 -0.0008
400 0.1463 nan 0.1000 -0.0006
420 0.1356 nan 0.1000 -0.0003
440 0.1258 nan 0.1000 -0.0002
460 0.1171 nan 0.1000 -0.0006
480 0.1080 nan 0.1000 -0.0003
500 0.1010 nan 0.1000 -0.0004
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1597 nan 0.1000 0.0104
2 1.1123 nan 0.1000 0.0070
3 1.0679 nan 0.1000 0.0133
4 1.0263 nan 0.1000 0.0081
5 0.9776 nan 0.1000 0.0138
6 0.9395 nan 0.1000 0.0040
7 0.9115 nan 0.1000 0.0058
8 0.8679 nan 0.1000 0.0154
9 0.8378 nan 0.1000 0.0063
10 0.8124 nan 0.1000 0.0031
20 0.6042 nan 0.1000 -0.0041
40 0.3622 nan 0.1000 -0.0017
60 0.2337 nan 0.1000 -0.0015
80 0.1509 nan 0.1000 -0.0006
100 0.1020 nan 0.1000 0.0007
120 0.0731 nan 0.1000 -0.0004
140 0.0503 nan 0.1000 -0.0003
160 0.0347 nan 0.1000 -0.0003
180 0.0247 nan 0.1000 -0.0001
200 0.0186 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1923 nan 0.1000 0.0069
2 1.1753 nan 0.1000 0.0026
3 1.1615 nan 0.1000 0.0018
4 1.1516 nan 0.1000 0.0004
5 1.1387 nan 0.1000 -0.0003
6 1.1278 nan 0.1000 -0.0004
7 1.1162 nan 0.1000 0.0014
8 1.1100 nan 0.1000 -0.0045
9 1.1033 nan 0.1000 -0.0027
10 1.0905 nan 0.1000 -0.0000
20 1.0045 nan 0.1000 -0.0024
40 0.8622 nan 0.1000 0.0025
60 0.7632 nan 0.1000 0.0008
80 0.6797 nan 0.1000 -0.0020
100 0.5942 nan 0.1000 -0.0024
120 0.5245 nan 0.1000 -0.0008
140 0.4875 nan 0.1000 -0.0025
160 0.4404 nan 0.1000 -0.0009
180 0.3995 nan 0.1000 -0.0009
200 0.3621 nan 0.1000 0.0001
220 0.3306 nan 0.1000 -0.0008
240 0.3009 nan 0.1000 -0.0001
260 0.2724 nan 0.1000 -0.0002
280 0.2443 nan 0.1000 -0.0009
300 0.2235 nan 0.1000 -0.0010
320 0.2072 nan 0.1000 -0.0015
340 0.1907 nan 0.1000 -0.0005
360 0.1723 nan 0.1000 -0.0003
380 0.1552 nan 0.1000 -0.0009
400 0.1432 nan 0.1000 -0.0004
420 0.1338 nan 0.1000 -0.0003
440 0.1230 nan 0.1000 -0.0003
460 0.1138 nan 0.1000 -0.0011
480 0.1041 nan 0.1000 -0.0004
500 0.0957 nan 0.1000 -0.0001
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1992 nan 0.1000 0.0023
2 1.1616 nan 0.1000 0.0117
3 1.1250 nan 0.1000 0.0066
4 1.0909 nan 0.1000 -0.0023
5 1.0455 nan 0.1000 0.0129
6 1.0061 nan 0.1000 0.0097
7 0.9735 nan 0.1000 0.0064
8 0.9347 nan 0.1000 0.0119
9 0.9011 nan 0.1000 0.0031
10 0.8777 nan 0.1000 -0.0027
20 0.6627 nan 0.1000 0.0020
40 0.4204 nan 0.1000 0.0012
60 0.2772 nan 0.1000 -0.0021
80 0.1891 nan 0.1000 -0.0010
100 0.1283 nan 0.1000 -0.0001
120 0.0914 nan 0.1000 -0.0006
140 0.0656 nan 0.1000 -0.0004
160 0.0483 nan 0.1000 -0.0003
180 0.0350 nan 0.1000 0.0000
200 0.0257 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs
# see the results
gbm_model$model_summary %>% t()
[,1]
n.trees 100.0000000
interaction.depth 5.0000000
shrinkage 0.1000000
n.minobsinnode 30.0000000
auc 0.9786579
auc_optimism_corrected 0.5328104
auc_optimism_corrected_CIL 0.4655437
auc_optimism_corrected_CIU 0.6350140
accuracy 0.8909953
accuracy_optimism_corrected 0.5524883
accuracy_optimism_corrected_CIL 0.5178431
accuracy_optimism_corrected_CIU 0.5937303
roc_c
level="genus"
Aggregate taxa
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level = level)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_ra")
Removing 84 ASV(s)
Removing 10 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3723 nan 0.1000 -0.0025
2 1.3697 nan 0.1000 -0.0097
3 1.3597 nan 0.1000 -0.0011
4 1.3554 nan 0.1000 -0.0061
5 1.3413 nan 0.1000 0.0022
6 1.3285 nan 0.1000 -0.0045
7 1.3210 nan 0.1000 -0.0041
8 1.3174 nan 0.1000 -0.0053
9 1.3090 nan 0.1000 -0.0088
10 1.3003 nan 0.1000 -0.0020
20 1.2197 nan 0.1000 -0.0011
40 1.0782 nan 0.1000 -0.0108
60 0.9770 nan 0.1000 -0.0016
80 0.8832 nan 0.1000 -0.0011
100 0.8230 nan 0.1000 -0.0039
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3030 nan 0.1000 0.0184
2 1.2549 nan 0.1000 -0.0031
3 1.1898 nan 0.1000 0.0207
4 1.1287 nan 0.1000 0.0166
5 1.0695 nan 0.1000 0.0191
6 1.0182 nan 0.1000 0.0102
7 0.9773 nan 0.1000 0.0067
8 0.9264 nan 0.1000 0.0165
9 0.8969 nan 0.1000 0.0011
10 0.8689 nan 0.1000 -0.0033
20 0.5840 nan 0.1000 0.0045
40 0.2815 nan 0.1000 0.0014
60 0.1506 nan 0.1000 0.0002
80 0.0831 nan 0.1000 -0.0005
100 0.0444 nan 0.1000 -0.0002
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3200 nan 0.1000 0.0209
2 1.2848 nan 0.1000 -0.0060
3 1.2322 nan 0.1000 0.0108
4 1.1762 nan 0.1000 0.0105
5 1.1148 nan 0.1000 0.0183
6 1.0673 nan 0.1000 0.0033
7 1.0241 nan 0.1000 -0.0001
8 0.9933 nan 0.1000 0.0011
9 0.9503 nan 0.1000 0.0155
10 0.9102 nan 0.1000 0.0092
20 0.6668 nan 0.1000 0.0003
40 0.3815 nan 0.1000 -0.0012
60 0.2371 nan 0.1000 -0.0012
80 0.1555 nan 0.1000 -0.0015
100 0.1042 nan 0.1000 -0.0014
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3084 nan 0.1000 0.0233
2 1.2422 nan 0.1000 0.0198
3 1.1906 nan 0.1000 0.0125
4 1.1572 nan 0.1000 0.0075
5 1.1151 nan 0.1000 0.0050
6 1.0718 nan 0.1000 0.0118
7 1.0317 nan 0.1000 0.0106
8 0.9971 nan 0.1000 0.0084
9 0.9714 nan 0.1000 -0.0036
10 0.9332 nan 0.1000 0.0089
20 0.6665 nan 0.1000 0.0031
40 0.3987 nan 0.1000 0.0011
60 0.2417 nan 0.1000 -0.0013
80 0.1479 nan 0.1000 -0.0012
100 0.0960 nan 0.1000 0.0004
120 0.0653 nan 0.1000 -0.0010
140 0.0456 nan 0.1000 -0.0003
160 0.0322 nan 0.1000 -0.0003
180 0.0224 nan 0.1000 0.0001
200 0.0147 nan 0.1000 -0.0001
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3502 nan 0.1000 0.0004
2 1.3174 nan 0.1000 0.0093
3 1.2743 nan 0.1000 0.0189
4 1.2592 nan 0.1000 -0.0057
5 1.2373 nan 0.1000 0.0060
6 1.1979 nan 0.1000 0.0074
7 1.1610 nan 0.1000 0.0108
8 1.1378 nan 0.1000 -0.0020
9 1.1217 nan 0.1000 -0.0050
10 1.0951 nan 0.1000 0.0019
20 0.9287 nan 0.1000 -0.0055
40 0.7124 nan 0.1000 -0.0009
60 0.5434 nan 0.1000 -0.0014
80 0.4347 nan 0.1000 0.0004
100 0.3480 nan 0.1000 -0.0027
120 0.2807 nan 0.1000 -0.0011
140 0.2305 nan 0.1000 -0.0016
160 0.1902 nan 0.1000 -0.0014
180 0.1529 nan 0.1000 -0.0005
200 0.1218 nan 0.1000 -0.0006
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3700 nan 0.1000 0.0006
2 1.3497 nan 0.1000 0.0041
3 1.3410 nan 0.1000 -0.0073
4 1.3218 nan 0.1000 0.0021
5 1.3041 nan 0.1000 -0.0000
6 1.2859 nan 0.1000 -0.0013
7 1.2619 nan 0.1000 0.0042
8 1.2395 nan 0.1000 0.0028
9 1.2243 nan 0.1000 -0.0024
10 1.2064 nan 0.1000 0.0060
20 1.0918 nan 0.1000 -0.0026
40 0.8993 nan 0.1000 -0.0054
60 0.7590 nan 0.1000 -0.0023
80 0.6496 nan 0.1000 -0.0028
100 0.5542 nan 0.1000 -0.0032
120 0.4768 nan 0.1000 -0.0025
140 0.4154 nan 0.1000 -0.0019
160 0.3671 nan 0.1000 -0.0013
180 0.3235 nan 0.1000 -0.0028
200 0.2847 nan 0.1000 -0.0019
220 0.2515 nan 0.1000 0.0001
240 0.2217 nan 0.1000 -0.0021
260 0.1960 nan 0.1000 -0.0000
280 0.1774 nan 0.1000 -0.0001
300 0.1595 nan 0.1000 -0.0000
320 0.1428 nan 0.1000 -0.0007
340 0.1282 nan 0.1000 0.0003
360 0.1145 nan 0.1000 -0.0007
380 0.1034 nan 0.1000 -0.0005
400 0.0940 nan 0.1000 -0.0010
420 0.0816 nan 0.1000 -0.0003
440 0.0741 nan 0.1000 -0.0007
460 0.0662 nan 0.1000 -0.0002
480 0.0604 nan 0.1000 -0.0004
500 0.0541 nan 0.1000 -0.0004
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3662 nan 0.1000 -0.0028
2 1.3543 nan 0.1000 -0.0023
3 1.3443 nan 0.1000 -0.0086
4 1.3288 nan 0.1000 0.0023
5 1.3093 nan 0.1000 0.0069
6 1.2941 nan 0.1000 -0.0006
7 1.2757 nan 0.1000 0.0010
8 1.2596 nan 0.1000 -0.0030
9 1.2455 nan 0.1000 -0.0008
10 1.2324 nan 0.1000 -0.0041
20 1.1145 nan 0.1000 0.0029
40 0.9375 nan 0.1000 -0.0004
60 0.8036 nan 0.1000 -0.0004
80 0.6932 nan 0.1000 -0.0018
100 0.6011 nan 0.1000 -0.0020
120 0.5285 nan 0.1000 -0.0002
140 0.4754 nan 0.1000 -0.0009
160 0.4267 nan 0.1000 -0.0017
180 0.3833 nan 0.1000 -0.0002
200 0.3419 nan 0.1000 -0.0018
220 0.3089 nan 0.1000 -0.0016
240 0.2813 nan 0.1000 -0.0019
260 0.2554 nan 0.1000 -0.0006
280 0.2308 nan 0.1000 -0.0012
300 0.2091 nan 0.1000 -0.0017
320 0.1924 nan 0.1000 -0.0009
340 0.1798 nan 0.1000 -0.0006
360 0.1637 nan 0.1000 -0.0010
380 0.1496 nan 0.1000 -0.0008
400 0.1373 nan 0.1000 -0.0003
420 0.1278 nan 0.1000 -0.0006
440 0.1200 nan 0.1000 -0.0009
460 0.1132 nan 0.1000 -0.0004
480 0.1036 nan 0.1000 -0.0003
500 0.0961 nan 0.1000 -0.0007
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3512 nan 0.1000 0.0033
2 1.3123 nan 0.1000 0.0105
3 1.2865 nan 0.1000 0.0005
4 1.2638 nan 0.1000 0.0013
5 1.2162 nan 0.1000 0.0125
6 1.1935 nan 0.1000 -0.0011
7 1.1663 nan 0.1000 0.0035
8 1.1579 nan 0.1000 -0.0000
9 1.1327 nan 0.1000 0.0011
10 1.1097 nan 0.1000 0.0002
20 0.9587 nan 0.1000 -0.0024
40 0.7486 nan 0.1000 -0.0008
60 0.6065 nan 0.1000 0.0008
80 0.4869 nan 0.1000 -0.0009
100 0.3902 nan 0.1000 0.0000
120 0.3235 nan 0.1000 -0.0016
140 0.2620 nan 0.1000 -0.0018
160 0.2217 nan 0.1000 -0.0002
180 0.1823 nan 0.1000 -0.0005
200 0.1551 nan 0.1000 -0.0014
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3352 nan 0.1000 0.0060
2 1.2901 nan 0.1000 0.0055
3 1.2608 nan 0.1000 0.0063
4 1.2036 nan 0.1000 0.0226
5 1.1675 nan 0.1000 0.0077
6 1.1468 nan 0.1000 0.0018
7 1.1248 nan 0.1000 -0.0071
8 1.1114 nan 0.1000 0.0010
9 1.1001 nan 0.1000 0.0002
10 1.0793 nan 0.1000 -0.0024
20 0.8868 nan 0.1000 -0.0023
40 0.5894 nan 0.1000 0.0004
60 0.4295 nan 0.1000 0.0010
80 0.3394 nan 0.1000 -0.0025
100 0.2535 nan 0.1000 -0.0013
120 0.1967 nan 0.1000 -0.0032
140 0.1562 nan 0.1000 -0.0007
160 0.1267 nan 0.1000 -0.0006
180 0.0981 nan 0.1000 -0.0000
200 0.0772 nan 0.1000 -0.0002
220 0.0611 nan 0.1000 -0.0000
240 0.0486 nan 0.1000 -0.0001
260 0.0387 nan 0.1000 -0.0000
280 0.0308 nan 0.1000 -0.0002
300 0.0240 nan 0.1000 -0.0001
320 0.0195 nan 0.1000 0.0000
340 0.0156 nan 0.1000 -0.0001
360 0.0125 nan 0.1000 -0.0001
380 0.0102 nan 0.1000 -0.0000
400 0.0086 nan 0.1000 0.0000
420 0.0068 nan 0.1000 -0.0000
440 0.0055 nan 0.1000 -0.0000
460 0.0045 nan 0.1000 -0.0001
480 0.0036 nan 0.1000 -0.0000
500 0.0028 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3461 nan 0.1000 0.0088
2 1.3258 nan 0.1000 0.0019
3 1.3060 nan 0.1000 0.0036
4 1.2801 nan 0.1000 0.0081
5 1.2555 nan 0.1000 -0.0021
6 1.2390 nan 0.1000 0.0029
7 1.2255 nan 0.1000 0.0013
8 1.2113 nan 0.1000 0.0015
9 1.1966 nan 0.1000 0.0004
10 1.1793 nan 0.1000 0.0009
20 1.0371 nan 0.1000 -0.0028
40 0.8292 nan 0.1000 0.0028
60 0.7022 nan 0.1000 -0.0032
80 0.5876 nan 0.1000 -0.0031
100 0.4980 nan 0.1000 0.0011
120 0.4287 nan 0.1000 -0.0012
140 0.3704 nan 0.1000 -0.0007
160 0.3252 nan 0.1000 -0.0007
180 0.2882 nan 0.1000 -0.0015
200 0.2520 nan 0.1000 -0.0015
Using 100 trees...
Using 100 trees...
Warning: There were missing values in resampled performance measures.Warning: missing values found in aggregated results
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3577 nan 0.1000 0.0025
2 1.3280 nan 0.1000 0.0076
3 1.2864 nan 0.1000 0.0101
4 1.2323 nan 0.1000 0.0172
5 1.1997 nan 0.1000 0.0072
6 1.1760 nan 0.1000 0.0040
7 1.1386 nan 0.1000 0.0099
8 1.1116 nan 0.1000 0.0043
9 1.0986 nan 0.1000 -0.0024
10 1.0901 nan 0.1000 -0.0039
20 0.8928 nan 0.1000 0.0000
40 0.6549 nan 0.1000 -0.0025
60 0.4846 nan 0.1000 0.0029
80 0.3684 nan 0.1000 -0.0028
100 0.2835 nan 0.1000 -0.0003
120 0.2292 nan 0.1000 -0.0018
140 0.1843 nan 0.1000 -0.0003
160 0.1431 nan 0.1000 -0.0011
180 0.1172 nan 0.1000 -0.0004
200 0.0935 nan 0.1000 -0.0006
Using 100 trees...
Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs
# see the results
gbm_model$model_summary %>% t()
[,1]
n.trees 100.0000000
interaction.depth 1.0000000
shrinkage 0.1000000
n.minobsinnode 20.0000000
auc 0.9596869
auc_optimism_corrected 0.5087091
auc_optimism_corrected_CIL 0.3924655
auc_optimism_corrected_CIU 0.6057091
accuracy 0.9020979
accuracy_optimism_corrected 0.5142506
accuracy_optimism_corrected_CIL 0.4721088
accuracy_optimism_corrected_CIU 0.5722273
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_ra")
Removing 46 ASV(s)
Removing 6 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2582 nan 0.1000 0.0046
2 1.2401 nan 0.1000 0.0016
3 1.2234 nan 0.1000 -0.0049
4 1.2095 nan 0.1000 -0.0023
5 1.1941 nan 0.1000 -0.0031
6 1.1745 nan 0.1000 -0.0030
7 1.1607 nan 0.1000 -0.0085
8 1.1413 nan 0.1000 -0.0068
9 1.1302 nan 0.1000 -0.0072
10 1.1073 nan 0.1000 0.0040
20 0.9621 nan 0.1000 0.0031
40 0.7701 nan 0.1000 -0.0044
60 0.6131 nan 0.1000 -0.0035
80 0.4966 nan 0.1000 -0.0018
100 0.4189 nan 0.1000 -0.0027
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2383 nan 0.1000 0.0130
2 1.1761 nan 0.1000 0.0140
3 1.1378 nan 0.1000 0.0100
4 1.0811 nan 0.1000 0.0189
5 1.0434 nan 0.1000 0.0069
6 1.0105 nan 0.1000 -0.0014
7 0.9626 nan 0.1000 0.0118
8 0.9379 nan 0.1000 -0.0031
9 0.8974 nan 0.1000 0.0079
10 0.8488 nan 0.1000 0.0153
20 0.6199 nan 0.1000 -0.0022
40 0.3440 nan 0.1000 -0.0004
60 0.1950 nan 0.1000 -0.0026
80 0.1196 nan 0.1000 -0.0006
100 0.0738 nan 0.1000 -0.0002
120 0.0488 nan 0.1000 -0.0002
140 0.0314 nan 0.1000 0.0001
160 0.0194 nan 0.1000 -0.0000
180 0.0127 nan 0.1000 -0.0001
200 0.0083 nan 0.1000 -0.0001
220 0.0057 nan 0.1000 -0.0000
240 0.0040 nan 0.1000 -0.0000
260 0.0028 nan 0.1000 0.0000
280 0.0020 nan 0.1000 -0.0000
300 0.0016 nan 0.1000 0.0000
320 0.0012 nan 0.1000 -0.0000
340 0.0007 nan 0.1000 0.0000
360 0.0005 nan 0.1000 0.0000
380 0.0003 nan 0.1000 0.0000
400 0.0002 nan 0.1000 -0.0000
420 0.0002 nan 0.1000 -0.0000
440 0.0001 nan 0.1000 -0.0000
460 0.0001 nan 0.1000 -0.0000
480 0.0001 nan 0.1000 -0.0000
500 0.0001 nan 0.1000 -0.0000
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1365 nan 0.1000 0.0162
2 1.0989 nan 0.1000 0.0057
3 1.0449 nan 0.1000 0.0135
4 1.0005 nan 0.1000 0.0130
5 0.9628 nan 0.1000 0.0053
6 0.9249 nan 0.1000 0.0065
7 0.8946 nan 0.1000 0.0058
8 0.8698 nan 0.1000 0.0042
9 0.8433 nan 0.1000 0.0016
10 0.8208 nan 0.1000 0.0035
20 0.6292 nan 0.1000 0.0026
40 0.3907 nan 0.1000 0.0000
60 0.2584 nan 0.1000 0.0008
80 0.1780 nan 0.1000 0.0002
100 0.1277 nan 0.1000 -0.0008
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2241 nan 0.1000 0.0197
2 1.1588 nan 0.1000 0.0151
3 1.0968 nan 0.1000 0.0178
4 1.0464 nan 0.1000 0.0098
5 0.9918 nan 0.1000 0.0176
6 0.9433 nan 0.1000 0.0162
7 0.8996 nan 0.1000 0.0132
8 0.8554 nan 0.1000 0.0124
9 0.8201 nan 0.1000 0.0081
10 0.7855 nan 0.1000 0.0056
20 0.5473 nan 0.1000 -0.0027
40 0.2783 nan 0.1000 -0.0004
60 0.1619 nan 0.1000 0.0006
80 0.0938 nan 0.1000 -0.0004
100 0.0577 nan 0.1000 -0.0002
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2451 nan 0.1000 0.0187
2 1.1917 nan 0.1000 0.0118
3 1.1262 nan 0.1000 0.0246
4 1.0727 nan 0.1000 0.0195
5 1.0168 nan 0.1000 0.0136
6 0.9683 nan 0.1000 0.0104
7 0.9375 nan 0.1000 -0.0018
8 0.8973 nan 0.1000 0.0130
9 0.8627 nan 0.1000 0.0073
10 0.8333 nan 0.1000 0.0073
20 0.5849 nan 0.1000 -0.0016
40 0.3013 nan 0.1000 0.0004
60 0.1794 nan 0.1000 -0.0003
80 0.1062 nan 0.1000 -0.0011
100 0.0665 nan 0.1000 -0.0001
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1530 nan 0.1000 0.0188
2 1.0868 nan 0.1000 0.0172
3 1.0388 nan 0.1000 0.0181
4 0.9878 nan 0.1000 0.0142
5 0.9598 nan 0.1000 0.0045
6 0.9125 nan 0.1000 0.0052
7 0.8746 nan 0.1000 -0.0009
8 0.8264 nan 0.1000 0.0145
9 0.7928 nan 0.1000 0.0065
10 0.7645 nan 0.1000 0.0030
20 0.5141 nan 0.1000 -0.0027
40 0.2733 nan 0.1000 0.0012
60 0.1503 nan 0.1000 -0.0006
80 0.0946 nan 0.1000 -0.0008
100 0.0564 nan 0.1000 -0.0004
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2870 nan 0.1000 0.0017
2 1.2665 nan 0.1000 0.0055
3 1.2507 nan 0.1000 0.0042
4 1.2298 nan 0.1000 0.0036
5 1.2206 nan 0.1000 -0.0006
6 1.2063 nan 0.1000 0.0005
7 1.1856 nan 0.1000 0.0047
8 1.1696 nan 0.1000 0.0032
9 1.1587 nan 0.1000 -0.0007
10 1.1416 nan 0.1000 0.0039
20 0.9969 nan 0.1000 0.0029
40 0.8131 nan 0.1000 -0.0014
60 0.6621 nan 0.1000 -0.0036
80 0.5621 nan 0.1000 -0.0001
100 0.4853 nan 0.1000 -0.0007
120 0.4116 nan 0.1000 -0.0005
140 0.3597 nan 0.1000 -0.0013
160 0.3106 nan 0.1000 -0.0002
180 0.2672 nan 0.1000 -0.0006
200 0.2327 nan 0.1000 -0.0014
220 0.1989 nan 0.1000 -0.0000
240 0.1741 nan 0.1000 -0.0022
260 0.1494 nan 0.1000 -0.0015
280 0.1314 nan 0.1000 -0.0003
300 0.1100 nan 0.1000 -0.0003
320 0.0956 nan 0.1000 -0.0004
340 0.0844 nan 0.1000 -0.0006
360 0.0742 nan 0.1000 -0.0005
380 0.0660 nan 0.1000 -0.0001
400 0.0567 nan 0.1000 -0.0005
420 0.0499 nan 0.1000 -0.0001
440 0.0446 nan 0.1000 -0.0002
460 0.0390 nan 0.1000 -0.0002
480 0.0346 nan 0.1000 -0.0002
500 0.0310 nan 0.1000 -0.0003
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2766 nan 0.1000 0.0047
2 1.2378 nan 0.1000 0.0157
3 1.2000 nan 0.1000 0.0057
4 1.1789 nan 0.1000 -0.0002
5 1.1469 nan 0.1000 0.0055
6 1.1188 nan 0.1000 0.0030
7 1.0919 nan 0.1000 0.0053
8 1.0484 nan 0.1000 0.0164
9 1.0316 nan 0.1000 0.0008
10 1.0162 nan 0.1000 0.0001
20 0.8296 nan 0.1000 -0.0068
40 0.5847 nan 0.1000 0.0003
60 0.4331 nan 0.1000 -0.0014
80 0.3347 nan 0.1000 -0.0011
100 0.2660 nan 0.1000 -0.0016
120 0.2030 nan 0.1000 -0.0010
140 0.1619 nan 0.1000 -0.0008
160 0.1272 nan 0.1000 -0.0005
180 0.0980 nan 0.1000 -0.0001
200 0.0770 nan 0.1000 -0.0003
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2197 nan 0.1000 0.0312
2 1.1551 nan 0.1000 0.0212
3 1.0946 nan 0.1000 0.0193
4 1.0287 nan 0.1000 0.0128
5 0.9855 nan 0.1000 0.0048
6 0.9425 nan 0.1000 0.0066
7 0.8978 nan 0.1000 0.0093
8 0.8636 nan 0.1000 0.0076
9 0.8279 nan 0.1000 0.0064
10 0.7959 nan 0.1000 0.0045
20 0.5491 nan 0.1000 0.0012
40 0.3088 nan 0.1000 0.0013
60 0.1763 nan 0.1000 -0.0016
80 0.1017 nan 0.1000 -0.0001
100 0.0635 nan 0.1000 0.0002
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1430 nan 0.1000 0.0068
2 1.1010 nan 0.1000 0.0088
3 1.0700 nan 0.1000 0.0082
4 1.0447 nan 0.1000 0.0021
5 1.0176 nan 0.1000 0.0037
6 0.9914 nan 0.1000 0.0009
7 0.9713 nan 0.1000 -0.0006
8 0.9666 nan 0.1000 -0.0096
9 0.9399 nan 0.1000 0.0028
10 0.9181 nan 0.1000 0.0015
20 0.7452 nan 0.1000 0.0018
40 0.5257 nan 0.1000 -0.0001
60 0.3726 nan 0.1000 -0.0004
80 0.2820 nan 0.1000 0.0007
100 0.2123 nan 0.1000 -0.0010
120 0.1567 nan 0.1000 -0.0008
140 0.1152 nan 0.1000 -0.0009
160 0.0900 nan 0.1000 0.0001
180 0.0701 nan 0.1000 -0.0000
200 0.0547 nan 0.1000 -0.0005
Using 100 trees...
Using 100 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2159 nan 0.1000 0.0254
2 1.1575 nan 0.1000 0.0199
3 1.0795 nan 0.1000 0.0292
4 1.0286 nan 0.1000 0.0116
5 0.9819 nan 0.1000 0.0101
6 0.9291 nan 0.1000 0.0133
7 0.8804 nan 0.1000 0.0143
8 0.8395 nan 0.1000 0.0121
9 0.8102 nan 0.1000 0.0049
10 0.7763 nan 0.1000 0.0090
20 0.5472 nan 0.1000 0.0008
40 0.2957 nan 0.1000 -0.0001
60 0.1699 nan 0.1000 -0.0016
80 0.1014 nan 0.1000 0.0004
100 0.0595 nan 0.1000 0.0003
120 0.0392 nan 0.1000 -0.0000
140 0.0238 nan 0.1000 -0.0002
160 0.0158 nan 0.1000 -0.0001
180 0.0097 nan 0.1000 -0.0000
200 0.0058 nan 0.1000 0.0000
Using 100 trees...
Using 100 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs
# see the results
gbm_model$model_summary %>% t()
[,1]
n.trees 100.0000000
interaction.depth 5.0000000
shrinkage 0.1000000
n.minobsinnode 20.0000000
auc 1.0000000
auc_optimism_corrected 0.5036906
auc_optimism_corrected_CIL 0.4093266
auc_optimism_corrected_CIU 0.5839071
accuracy 0.9951923
accuracy_optimism_corrected 0.5946566
accuracy_optimism_corrected_CIL 0.5342840
accuracy_optimism_corrected_CIU 0.6931171
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group,
usage="ml_ra")
Removing 45 ASV(s)
Removing 2 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2422 nan 0.1000 -0.0026
2 1.2156 nan 0.1000 -0.0134
3 1.1797 nan 0.1000 0.0037
4 1.1210 nan 0.1000 0.0011
5 1.0908 nan 0.1000 -0.0091
6 1.0687 nan 0.1000 -0.0014
7 1.0276 nan 0.1000 0.0063
8 0.9968 nan 0.1000 0.0033
9 0.9649 nan 0.1000 -0.0018
10 0.9486 nan 0.1000 -0.0132
20 0.7349 nan 0.1000 -0.0054
40 0.4732 nan 0.1000 -0.0014
60 0.3156 nan 0.1000 -0.0008
80 0.2148 nan 0.1000 -0.0025
100 0.1511 nan 0.1000 -0.0025
120 0.1042 nan 0.1000 -0.0005
140 0.0747 nan 0.1000 -0.0003
160 0.0539 nan 0.1000 -0.0004
180 0.0396 nan 0.1000 -0.0004
200 0.0290 nan 0.1000 -0.0002
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2720 nan 0.1000 0.0069
2 1.2143 nan 0.1000 0.0156
3 1.1670 nan 0.1000 0.0058
4 1.1102 nan 0.1000 0.0157
5 1.0487 nan 0.1000 0.0189
6 1.0161 nan 0.1000 -0.0028
7 0.9675 nan 0.1000 0.0058
8 0.9215 nan 0.1000 0.0088
9 0.8796 nan 0.1000 0.0096
10 0.8438 nan 0.1000 0.0043
20 0.5948 nan 0.1000 0.0027
40 0.3258 nan 0.1000 -0.0002
60 0.1776 nan 0.1000 -0.0002
80 0.0997 nan 0.1000 -0.0000
100 0.0599 nan 0.1000 -0.0008
120 0.0365 nan 0.1000 -0.0002
140 0.0230 nan 0.1000 -0.0001
160 0.0139 nan 0.1000 -0.0001
180 0.0090 nan 0.1000 -0.0001
200 0.0054 nan 0.1000 -0.0000
220 0.0034 nan 0.1000 -0.0000
240 0.0022 nan 0.1000 -0.0000
260 0.0014 nan 0.1000 -0.0000
280 0.0009 nan 0.1000 -0.0000
300 0.0005 nan 0.1000 -0.0000
320 0.0003 nan 0.1000 -0.0000
340 0.0002 nan 0.1000 -0.0000
360 0.0001 nan 0.1000 0.0000
380 0.0001 nan 0.1000 -0.0000
400 0.0001 nan 0.1000 -0.0000
420 0.0000 nan 0.1000 -0.0000
440 0.0000 nan 0.1000 -0.0000
460 0.0000 nan 0.1000 -0.0000
480 0.0000 nan 0.1000 -0.0000
500 0.0000 nan 0.1000 -0.0000
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2199 nan 0.1000 0.0043
2 1.1786 nan 0.1000 0.0177
3 1.1593 nan 0.1000 -0.0000
4 1.1297 nan 0.1000 0.0067
5 1.0911 nan 0.1000 0.0113
6 1.0698 nan 0.1000 0.0004
7 1.0457 nan 0.1000 -0.0046
8 1.0220 nan 0.1000 0.0002
9 0.9947 nan 0.1000 0.0059
10 0.9812 nan 0.1000 -0.0060
20 0.8164 nan 0.1000 0.0052
40 0.5575 nan 0.1000 -0.0011
60 0.4109 nan 0.1000 -0.0016
80 0.3032 nan 0.1000 -0.0003
100 0.2222 nan 0.1000 -0.0006
120 0.1641 nan 0.1000 -0.0011
140 0.1213 nan 0.1000 -0.0004
160 0.0919 nan 0.1000 -0.0001
180 0.0697 nan 0.1000 -0.0000
200 0.0537 nan 0.1000 0.0003
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2680 nan 0.1000 0.0196
2 1.2109 nan 0.1000 0.0121
3 1.1546 nan 0.1000 0.0091
4 1.1014 nan 0.1000 0.0139
5 1.0386 nan 0.1000 0.0173
6 0.9928 nan 0.1000 0.0090
7 0.9595 nan 0.1000 0.0034
8 0.9315 nan 0.1000 0.0025
9 0.8943 nan 0.1000 0.0062
10 0.8768 nan 0.1000 -0.0056
20 0.6342 nan 0.1000 0.0027
40 0.3451 nan 0.1000 -0.0007
60 0.2004 nan 0.1000 -0.0004
80 0.1191 nan 0.1000 -0.0007
100 0.0714 nan 0.1000 -0.0006
120 0.0450 nan 0.1000 -0.0003
140 0.0279 nan 0.1000 -0.0002
160 0.0168 nan 0.1000 -0.0000
180 0.0107 nan 0.1000 -0.0000
200 0.0070 nan 0.1000 -0.0000
220 0.0044 nan 0.1000 -0.0000
240 0.0028 nan 0.1000 0.0000
260 0.0018 nan 0.1000 -0.0000
280 0.0012 nan 0.1000 -0.0000
300 0.0007 nan 0.1000 -0.0000
320 0.0005 nan 0.1000 0.0000
340 0.0003 nan 0.1000 0.0000
360 0.0002 nan 0.1000 -0.0000
380 0.0001 nan 0.1000 -0.0000
400 0.0001 nan 0.1000 0.0000
420 0.0000 nan 0.1000 -0.0000
440 0.0000 nan 0.1000 -0.0000
460 0.0000 nan 0.1000 0.0000
480 0.0000 nan 0.1000 -0.0000
500 0.0000 nan 0.1000 -0.0000
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2657 nan 0.1000 0.0004
2 1.2363 nan 0.1000 0.0023
3 1.2069 nan 0.1000 0.0043
4 1.1743 nan 0.1000 0.0054
5 1.1543 nan 0.1000 0.0011
6 1.1274 nan 0.1000 -0.0031
7 1.0989 nan 0.1000 0.0046
8 1.0722 nan 0.1000 0.0048
9 1.0454 nan 0.1000 0.0071
10 1.0202 nan 0.1000 0.0024
20 0.8129 nan 0.1000 -0.0039
40 0.5768 nan 0.1000 -0.0013
60 0.4243 nan 0.1000 -0.0021
80 0.3151 nan 0.1000 0.0003
100 0.2445 nan 0.1000 -0.0009
120 0.1909 nan 0.1000 0.0006
140 0.1508 nan 0.1000 0.0003
160 0.1174 nan 0.1000 -0.0007
180 0.0924 nan 0.1000 -0.0006
200 0.0718 nan 0.1000 -0.0002
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1124 nan 0.1000 0.0269
2 1.0492 nan 0.1000 0.0180
3 0.9939 nan 0.1000 0.0148
4 0.9538 nan 0.1000 0.0023
5 0.9180 nan 0.1000 0.0093
6 0.8673 nan 0.1000 0.0136
7 0.8154 nan 0.1000 0.0202
8 0.7783 nan 0.1000 0.0043
9 0.7476 nan 0.1000 0.0080
10 0.7130 nan 0.1000 0.0097
20 0.4759 nan 0.1000 -0.0005
40 0.2483 nan 0.1000 0.0002
60 0.1330 nan 0.1000 0.0002
80 0.0811 nan 0.1000 -0.0006
100 0.0481 nan 0.1000 -0.0002
120 0.0302 nan 0.1000 -0.0002
140 0.0193 nan 0.1000 -0.0004
160 0.0129 nan 0.1000 -0.0001
180 0.0079 nan 0.1000 -0.0001
200 0.0049 nan 0.1000 0.0000
220 0.0031 nan 0.1000 -0.0000
240 0.0025 nan 0.1000 -0.0000
260 0.0016 nan 0.1000 -0.0000
280 0.0016 nan 0.1000 -0.0001
300 0.0010 nan 0.1000 -0.0000
320 0.0006 nan 0.1000 0.0000
340 0.0004 nan 0.1000 -0.0000
360 0.0003 nan 0.1000 0.0000
380 0.0002 nan 0.1000 0.0000
400 0.0002 nan 0.1000 -0.0000
420 0.0003 nan 0.1000 -0.0000
440 0.0001 nan 0.1000 -0.0000
460 0.0001 nan 0.1000 -0.0000
480 0.0000 nan 0.1000 -0.0000
500 0.0000 nan 0.1000 -0.0000
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2716 nan 0.1000 0.0200
2 1.2193 nan 0.1000 0.0126
3 1.1544 nan 0.1000 0.0222
4 1.0916 nan 0.1000 0.0210
5 1.0490 nan 0.1000 0.0053
6 1.0013 nan 0.1000 0.0108
7 0.9655 nan 0.1000 0.0052
8 0.9278 nan 0.1000 0.0107
9 0.8945 nan 0.1000 0.0033
10 0.8509 nan 0.1000 0.0068
20 0.6095 nan 0.1000 0.0009
40 0.3509 nan 0.1000 -0.0024
60 0.2016 nan 0.1000 -0.0007
80 0.1242 nan 0.1000 -0.0008
100 0.0772 nan 0.1000 -0.0001
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2165 nan 0.1000 0.0214
2 1.1549 nan 0.1000 0.0139
3 1.0910 nan 0.1000 0.0218
4 1.0410 nan 0.1000 0.0148
5 0.9962 nan 0.1000 0.0101
6 0.9523 nan 0.1000 0.0135
7 0.9185 nan 0.1000 0.0034
8 0.8832 nan 0.1000 -0.0008
9 0.8450 nan 0.1000 0.0115
10 0.8050 nan 0.1000 0.0063
20 0.5656 nan 0.1000 -0.0026
40 0.2911 nan 0.1000 0.0003
60 0.1729 nan 0.1000 -0.0007
80 0.1000 nan 0.1000 0.0003
100 0.0654 nan 0.1000 -0.0007
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.1998 nan 0.1000 0.0156
2 1.1546 nan 0.1000 0.0165
3 1.1114 nan 0.1000 0.0087
4 1.0538 nan 0.1000 0.0192
5 1.0260 nan 0.1000 -0.0002
6 0.9916 nan 0.1000 0.0093
7 0.9638 nan 0.1000 0.0012
8 0.9342 nan 0.1000 0.0083
9 0.9028 nan 0.1000 0.0012
10 0.8712 nan 0.1000 0.0103
20 0.6633 nan 0.1000 0.0020
40 0.4204 nan 0.1000 -0.0012
60 0.2712 nan 0.1000 0.0010
80 0.1832 nan 0.1000 0.0005
100 0.1291 nan 0.1000 -0.0002
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2268 nan 0.1000 0.0256
2 1.1605 nan 0.1000 0.0203
3 1.0940 nan 0.1000 0.0257
4 1.0399 nan 0.1000 0.0108
5 1.0042 nan 0.1000 0.0009
6 0.9574 nan 0.1000 0.0118
7 0.8991 nan 0.1000 0.0209
8 0.8459 nan 0.1000 0.0138
9 0.8152 nan 0.1000 0.0074
10 0.7727 nan 0.1000 0.0085
20 0.5375 nan 0.1000 -0.0013
40 0.2890 nan 0.1000 -0.0013
60 0.1673 nan 0.1000 -0.0011
80 0.1029 nan 0.1000 -0.0006
100 0.0608 nan 0.1000 -0.0002
120 0.0384 nan 0.1000 -0.0003
140 0.0238 nan 0.1000 -0.0002
160 0.0159 nan 0.1000 -0.0002
180 0.0105 nan 0.1000 -0.0001
200 0.0065 nan 0.1000 -0.0001
220 0.0042 nan 0.1000 -0.0000
240 0.0028 nan 0.1000 -0.0000
260 0.0018 nan 0.1000 -0.0000
280 0.0011 nan 0.1000 -0.0000
300 0.0007 nan 0.1000 -0.0000
320 0.0005 nan 0.1000 0.0000
340 0.0003 nan 0.1000 -0.0000
360 0.0002 nan 0.1000 0.0000
380 0.0001 nan 0.1000 -0.0000
400 0.0001 nan 0.1000 0.0000
420 0.0000 nan 0.1000 0.0000
440 0.0000 nan 0.1000 -0.0000
460 0.0000 nan 0.1000 -0.0000
480 0.0000 nan 0.1000 0.0000
500 0.0000 nan 0.1000 -0.0000
Using 200 trees...
Using 200 trees...
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.2663 nan 0.1000 0.0129
2 1.2227 nan 0.1000 0.0084
3 1.1793 nan 0.1000 0.0098
4 1.1333 nan 0.1000 0.0144
5 1.0936 nan 0.1000 0.0104
6 1.0592 nan 0.1000 0.0071
7 1.0238 nan 0.1000 0.0092
8 0.9995 nan 0.1000 -0.0000
9 0.9593 nan 0.1000 0.0130
10 0.9284 nan 0.1000 0.0085
20 0.7129 nan 0.1000 0.0013
40 0.4611 nan 0.1000 0.0015
60 0.3163 nan 0.1000 -0.0013
80 0.2273 nan 0.1000 -0.0011
100 0.1598 nan 0.1000 0.0002
120 0.1138 nan 0.1000 -0.0002
140 0.0839 nan 0.1000 0.0001
160 0.0615 nan 0.1000 0.0001
180 0.0448 nan 0.1000 -0.0002
200 0.0336 nan 0.1000 -0.0000
Using 200 trees...
Using 200 trees...
# ROC curve
roc_c <- roc_curve(gbm_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs
# see the results
gbm_model$model_summary %>% t()
[,1]
n.trees 200.0000000
interaction.depth 5.0000000
shrinkage 0.1000000
n.minobsinnode 10.0000000
auc 1.0000000
auc_optimism_corrected 0.5026238
auc_optimism_corrected_CIL 0.4422162
auc_optimism_corrected_CIU 0.5990286
accuracy 1.0000000
accuracy_optimism_corrected 0.5550849
accuracy_optimism_corrected_CIL 0.4913026
accuracy_optimism_corrected_CIU 0.6206456
roc_c
models_list <- list()
for (model_name in names(supplements_models$models_summ)){
df <- do.call(rbind, supplements_models$models_summ[[model_name]])
models_list[[model_name]] <- df
}
write.xlsx(models_list,
file=file.path(path,paste0("supplements_models_",segment,".xlsx")),
rowNames=TRUE)
segment="colon"
path = "../results/Q1/models_overfitting_check"
model="enet"
level="ASV"
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
colon_taxa_tab,
colon_metadata,
group, usage="ml_clr",
patient = TRUE)
Removing 1157 ASV(s)
Removing 52 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group", N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.0000000
lambda 67.4876631
auc 0.6741089
auc_czech 0.7313255
auc_no 0.6465953
auc_optimism_corrected 0.4821532
auc_optimism_corrected_CIL 0.4309691
auc_optimism_corrected_CIU 0.5262457
accuracy 0.5826377
accuracy_czech NaN
accuracy_no 0.5787966
accuracy_optimism_corrected 0.5552662
accuracy_optimism_corrected_CIL 0.5068982
accuracy_optimism_corrected_CIU 0.6114882
enet_model$conf_matrices
$original
0
0 349 0
1 250 0
$czech
0
0 147 0
1 103 0
$no
0
0 202 0
1 147 0
enet_model$plot
roc_c
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
colon_taxa_tab,
colon_metadata,
group,
usage="ml_clr",
patient = TRUE)
Removing 1096 ASV(s)
Removing 50 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.0000000
lambda 51.8414603
auc 0.7602734
auc_czech 0.7929631
auc_no 0.7184415
auc_optimism_corrected 0.4945404
auc_optimism_corrected_CIL 0.4553473
auc_optimism_corrected_CIU 0.5558614
accuracy 0.6843137
accuracy_czech NaN
accuracy_no 0.6666667
accuracy_optimism_corrected 0.6705656
accuracy_optimism_corrected_CIL 0.6245660
accuracy_optimism_corrected_CIU 0.7041182
enet_model$conf_matrices
$original
1
0 161 0
1 349 0
$czech
1
0 92 0
1 211 0
$no
1
0 69 0
1 138 0
enet_model$plot
roc_c
level="genus"
Aggregate taxa
genus_data <- aggregate_taxa(colon_asv_tab,
colon_taxa_tab,
taxonomic_level = level)
colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
colon_genus_taxa_tab,
colon_metadata,
group,
usage="ml_clr",
patient = TRUE)
Removing 135 ASV(s)
Removing 10 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1",overfitting_check = TRUE)
# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.80000000
lambda 0.07056436
auc 0.50000000
auc_czech 0.50000000
auc_no 0.50000000
auc_optimism_corrected 0.45284404
auc_optimism_corrected_CIL 0.39551409
auc_optimism_corrected_CIU 0.49948653
accuracy 0.60827251
accuracy_czech NaN
accuracy_no 0.60218978
accuracy_optimism_corrected 0.57337784
accuracy_optimism_corrected_CIL 0.52482349
accuracy_optimism_corrected_CIU 0.62227176
enet_model$conf_matrices
$original
1
0 161 0
1 250 0
$czech
1
0 52 0
1 85 0
$no
1
0 109 0
1 165 0
enet_model$plot
roc_c
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
colon_genus_taxa_tab,
colon_metadata,
group,
usage="ml_clr",
patient = TRUE)
Removing 17 ASV(s)
Removing 10 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.80000000
lambda 0.07324714
auc 0.50000000
auc_czech 0.50000000
auc_no 0.50000000
auc_optimism_corrected 0.49333832
auc_optimism_corrected_CIL 0.47331491
auc_optimism_corrected_CIU 0.52415390
accuracy 0.58333333
accuracy_czech NaN
accuracy_no 0.56446991
accuracy_optimism_corrected 0.56083992
accuracy_optimism_corrected_CIL 0.49608392
accuracy_optimism_corrected_CIU 0.60465613
enet_model$conf_matrices
$original
0
0 350 0
1 250 0
$czech
0
0 153 0
1 98 0
$no
0
0 197 0
1 152 0
enet_model$plot
roc_c
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
colon_genus_taxa_tab,
colon_metadata,
group,
usage="ml_clr",
patient = TRUE)
Removing 71 ASV(s)
Removing 5 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
Warning: from glmnet C++ code (error code -100); Convergence for 100th lambda value not reached after maxit=100000 iterations; solutions for larger lambdas returned
# ROC
roc_c <- roc_curve(enet_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)
# see the results
enet_model$model_summary %>% t()
[,1]
alpha 0.0000000
lambda 62.4494689
auc 0.7251109
auc_czech 0.7409982
auc_no 0.7004115
auc_optimism_corrected 0.4714759
auc_optimism_corrected_CIL 0.4071017
auc_optimism_corrected_CIU 0.5395775
accuracy 0.6849315
accuracy_czech NaN
accuracy_no 0.6521739
accuracy_optimism_corrected 0.6708665
accuracy_optimism_corrected_CIL 0.6086170
accuracy_optimism_corrected_CIU 0.7311268
enet_model$conf_matrices
$original
1
0 161 0
1 350 0
$czech
1
0 89 0
1 215 0
$no
1
0 72 0
1 135 0
enet_model$plot
roc_c
models_summ_df_colon <- do.call(rbind,
models_summ[grep(segment,names(models_summ),value = TRUE)])
write.csv(models_summ_df_colon,file.path(path,paste0("elastic_net_",segment,".csv")))
model="knn"
level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
colon_taxa_tab,
colon_metadata,
group, usage="ml_clr",
patient = TRUE)
Removing 1157 ASV(s)
Removing 52 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 24.0000000
auc 0.6464814
auc_optimism_corrected 0.4869012
auc_optimism_corrected_CIL 0.4479849
auc_optimism_corrected_CIU 0.5201409
accuracy 0.5993322
accuracy_optimism_corrected 0.5568528
accuracy_optimism_corrected_CIL 0.5195593
accuracy_optimism_corrected_CIU 0.5805193
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
colon_taxa_tab,
colon_metadata,
group, usage="ml_clr",
patient = TRUE)
Removing 1096 ASV(s)
Removing 50 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 27.0000000
auc 0.5670594
auc_optimism_corrected 0.4409962
auc_optimism_corrected_CIL 0.3668215
auc_optimism_corrected_CIU 0.5009250
accuracy 0.6882353
accuracy_optimism_corrected 0.6590241
accuracy_optimism_corrected_CIL 0.6378461
accuracy_optimism_corrected_CIU 0.6852319
roc_c
level="genus"
Aggregate taxa
genus_data <- aggregate_taxa(colon_asv_tab,
colon_taxa_tab,
taxonomic_level = level)
colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
colon_genus_taxa_tab,
colon_metadata,
group,
usage="ml_clr",
patient = TRUE)
Removing 135 ASV(s)
Removing 10 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 29.0000000
auc 0.6010435
auc_optimism_corrected 0.4871584
auc_optimism_corrected_CIL 0.4329184
auc_optimism_corrected_CIU 0.5517731
accuracy 0.6107056
accuracy_optimism_corrected 0.5527559
accuracy_optimism_corrected_CIL 0.5087752
accuracy_optimism_corrected_CIU 0.6054608
roc_c
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
colon_genus_taxa_tab,
colon_metadata,
group,
usage="ml_clr",
patient = TRUE)
Removing 17 ASV(s)
Removing 10 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 30.0000000
auc 0.6043829
auc_optimism_corrected 0.5211546
auc_optimism_corrected_CIL 0.4650463
auc_optimism_corrected_CIU 0.5706488
accuracy 0.5883333
accuracy_optimism_corrected 0.5710544
accuracy_optimism_corrected_CIL 0.5116544
accuracy_optimism_corrected_CIU 0.6208351
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
colon_genus_taxa_tab,
colon_metadata,
group,
usage="ml_clr",
patient = TRUE)
Removing 71 ASV(s)
Removing 5 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
# ROC curve
roc_c <- roc_curve(knn_model, group)
Warning: Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs
# see the results
knn_model$model_summary %>% t()
[,1]
k 30.0000000
auc 0.6109583
auc_optimism_corrected 0.4960179
auc_optimism_corrected_CIL 0.4322515
auc_optimism_corrected_CIU 0.5666104
accuracy 0.6868885
accuracy_optimism_corrected 0.6426930
accuracy_optimism_corrected_CIL 0.5991299
accuracy_optimism_corrected_CIU 0.6791946
roc_c
model="rf"
level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
colon_taxa_tab,
colon_metadata,
group, usage="ml_clr",
patient = TRUE)
Removing 2138 ASV(s)
Removing 70 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1",
overfitting_check = TRUE)
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
colon_taxa_tab,
colon_metadata,
group, usage="ml_clr",
patient = TRUE)
Removing 1157 ASV(s)
Removing 52 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1")
# ROC curve
roc_c <- roc_curve(rf_model, group)
Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, :
Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs
# see the results
rf_model$model_summary %>% t()
[,1]
mtry "143"
splitrule "gini"
min.node.size "2"
auc "1"
auc_optimism_corrected "0.9599069"
auc_optimism_corrected_CIL "0.9178946"
auc_optimism_corrected_CIU "0.9905348"
accuracy "1"
accuracy_optimism_corrected "0.8984082"
accuracy_optimism_corrected_CIL "0.8416505"
accuracy_optimism_corrected_CIU "0.9411982"
roc_c
post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)
# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
colon_taxa_tab,
colon_metadata,
group, usage="ml_clr",
patient = TRUE)
Removing 1096 ASV(s)
Removing 50 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
sample_method = "atypboot",
outcome="Group",
N=10,
clust_var="Patient",
reuse=FALSE,
file=model_name,
Q="Q1")
Aggregate taxa
pre_ltx vs healthy
pre_ltx vs post_ltx
post_ltx vs healthy
pre_ltx vs healthy
pre_ltx vs post_ltx
post_ltx vs healthy
Aggregate taxa
pre_ltx vs healthy
pre_ltx vs post_ltx
post_ltx vs healthy
pre_ltx vs healthy
pre_ltx vs post_ltx
post_ltx vs healthy
Aggregate taxa
pre_ltx vs healthy
pre_ltx vs post_ltx
post_ltx vs healthy
pre_ltx vs healthy
pre_ltx vs post_ltx
post_ltx vs healthy
Aggregate taxa
pre_ltx vs healthy
pre_ltx vs post_ltx
post_ltx vs healthy
pre_ltx vs healthy
pre_ltx vs post_ltx
post_ltx vs healthy
Aggregate taxa
pre_ltx vs healthy
pre_ltx vs post_ltx
post_ltx vs healthy
pre_ltx vs healthy
pre_ltx vs post_ltx
post_ltx vs healthy
Aggregate taxa
pre_ltx vs healthy
pre_ltx vs post_ltx
post_ltx vs healthy